diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c3e74390646..ef554a1b0ff 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -157,7 +157,8 @@ jobs: ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: - if: ${{ !failure() && !cancelled() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderDebAarch64 @@ -177,7 +178,8 @@ jobs: run_command: | python3 build_report_check.py "$CHECK_NAME" BuilderSpecialReport: - if: ${{ !failure() && !cancelled() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderBinDarwin diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 432a9df5369..d2865eb737d 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -262,6 +262,8 @@ jobs: ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderBinRelease @@ -272,7 +274,6 @@ jobs: - BuilderDebRelease - BuilderDebTsan - BuilderDebUBsan - if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -285,7 +286,8 @@ jobs: run_command: | python3 build_report_check.py "$CHECK_NAME" BuilderSpecialReport: - if: ${{ !failure() && !cancelled() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderBinAarch64 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 08a4ab99520..bd2b2b60904 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -291,6 +291,8 @@ jobs: ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderBinRelease @@ -301,7 +303,6 @@ jobs: - BuilderDebRelease - BuilderDebTsan - BuilderDebUBsan - if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -314,7 +315,8 @@ jobs: run_command: | python3 build_report_check.py "$CHECK_NAME" BuilderSpecialReport: - if: ${{ !failure() && !cancelled() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderBinAarch64 diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index fa8e93369b3..69229ef75df 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -172,6 +172,8 @@ jobs: ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderDebRelease @@ -181,7 +183,6 @@ jobs: - BuilderDebUBsan - BuilderDebMsan - BuilderDebDebug - if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -194,7 +195,8 @@ jobs: run_command: | python3 build_report_check.py "$CHECK_NAME" BuilderSpecialReport: - if: ${{ !failure() && !cancelled() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - RunConfig - BuilderDebRelease diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index b1bc64c1f69..e6aa04a3569 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -76,6 +76,8 @@ jobs: run: | python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME" - name: Post + # it still be build report to upload for failed build job + if: always() run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}' - name: Mark as done diff --git a/.gitmessage b/.gitmessage new file mode 100644 index 00000000000..f4a25a837bc --- /dev/null +++ b/.gitmessage @@ -0,0 +1,10 @@ + + +## To avoid merge commit in CI run (add a leading space to apply): +#no-merge-commit + +## Running specified job (add a leading space to apply): +#job_ +#job_stateless_tests_release +#job_package_debug +#job_integration_tests_asan diff --git a/.gitmodules b/.gitmodules index 53ef899dd99..3b9faea3cc1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -360,3 +360,6 @@ [submodule "contrib/sqids-cpp"] path = contrib/sqids-cpp url = https://github.com/sqids/sqids-cpp.git +[submodule "contrib/idna"] + path = contrib/idna + url = https://github.com/ada-url/idna.git diff --git a/base/poco/Foundation/include/Poco/StreamUtil.h b/base/poco/Foundation/include/Poco/StreamUtil.h index fa1814a0f2e..ed0a4fb5154 100644 --- a/base/poco/Foundation/include/Poco/StreamUtil.h +++ b/base/poco/Foundation/include/Poco/StreamUtil.h @@ -69,6 +69,9 @@ // init() is called in the MyIOS constructor. // Therefore we replace each call to init() with // the poco_ios_init macro defined below. +// +// Also this macro will adjust exceptions() flags, since by default std::ios +// will hide exceptions, while in ClickHouse it is better to pass them through. #if !defined(POCO_IOS_INIT_HACK) @@ -79,7 +82,10 @@ #if defined(POCO_IOS_INIT_HACK) # define poco_ios_init(buf) #else -# define poco_ios_init(buf) init(buf) +# define poco_ios_init(buf) do { \ + init(buf); \ + this->exceptions(std::ios::failbit | std::ios::badbit); \ +} while (0) #endif diff --git a/base/poco/Foundation/include/Poco/UTF32Encoding.h b/base/poco/Foundation/include/Poco/UTF32Encoding.h index e6784e787cc..dafac005e83 100644 --- a/base/poco/Foundation/include/Poco/UTF32Encoding.h +++ b/base/poco/Foundation/include/Poco/UTF32Encoding.h @@ -70,6 +70,15 @@ public: int queryConvert(const unsigned char * bytes, int length) const; int sequenceLength(const unsigned char * bytes, int length) const; +protected: + static int safeToInt(Poco::UInt32 value) + { + if (value <= 0x10FFFF) + return static_cast(value); + else + return -1; + } + private: bool _flipBytes; static const char * _names[]; diff --git a/base/poco/Foundation/src/UTF32Encoding.cpp b/base/poco/Foundation/src/UTF32Encoding.cpp index ff07006a4fb..e600c5d9445 100644 --- a/base/poco/Foundation/src/UTF32Encoding.cpp +++ b/base/poco/Foundation/src/UTF32Encoding.cpp @@ -30,22 +30,22 @@ const char* UTF32Encoding::_names[] = const TextEncoding::CharacterMap UTF32Encoding::_charMap = { - /* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 00 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 10 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 20 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 30 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 40 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 50 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 60 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 70 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 80 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 90 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* a0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* b0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* c0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* d0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* e0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, }; @@ -118,7 +118,7 @@ const TextEncoding::CharacterMap& UTF32Encoding::characterMap() const int UTF32Encoding::convert(const unsigned char* bytes) const { UInt32 uc; - unsigned char* p = (unsigned char*) &uc; + unsigned char* p = reinterpret_cast(&uc); *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; @@ -129,7 +129,7 @@ int UTF32Encoding::convert(const unsigned char* bytes) const ByteOrder::flipBytes(uc); } - return uc; + return safeToInt(uc); } @@ -138,7 +138,7 @@ int UTF32Encoding::convert(int ch, unsigned char* bytes, int length) const if (bytes && length >= 4) { UInt32 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt32) ch) : (UInt32) ch; - unsigned char* p = (unsigned char*) &ch1; + unsigned char* p = reinterpret_cast(&ch1); *bytes++ = *p++; *bytes++ = *p++; *bytes++ = *p++; @@ -155,14 +155,14 @@ int UTF32Encoding::queryConvert(const unsigned char* bytes, int length) const if (length >= 4) { UInt32 uc; - unsigned char* p = (unsigned char*) &uc; + unsigned char* p = reinterpret_cast(&uc); *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; if (_flipBytes) ByteOrder::flipBytes(uc); - return uc; + ret = safeToInt(uc); } return ret; diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1b5ba15187f..02cb19d4c07 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx) add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) +add_contrib (idna-cmake idna) add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (incbin-cmake incbin) add_contrib (sqids-cpp-cmake sqids-cpp) diff --git a/contrib/azure b/contrib/azure index 352ff0a61cb..060c54dfb0a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 352ff0a61cb319ac1cc38c4058443ddf70147530 +Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3 diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index bb44c993e79..0d2512c9e6e 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -8,37 +8,21 @@ endif() set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure") set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk") -file(GLOB AZURE_SDK_CORE_SRC +file(GLOB AZURE_SDK_SRC "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp" -) - -file(GLOB AZURE_SDK_IDENTITY_SRC + "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/tracing/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp" -) - -file(GLOB AZURE_SDK_STORAGE_COMMON_SRC - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp" -) - -file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp" + "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.cpp" + "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp" ) file(GLOB AZURE_SDK_UNIFIED_SRC - ${AZURE_SDK_CORE_SRC} - ${AZURE_SDK_IDENTITY_SRC} - ${AZURE_SDK_STORAGE_COMMON_SRC} - ${AZURE_SDK_STORAGE_BLOBS_SRC} + ${AZURE_SDK_SRC} ) set(AZURE_SDK_INCLUDES diff --git a/contrib/boringssl b/contrib/boringssl index 8061ac62d67..aa6d2f865a2 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit 8061ac62d67953e61b793042e33baf1352e67510 +Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4 diff --git a/contrib/idna b/contrib/idna new file mode 160000 index 00000000000..3c8be01d42b --- /dev/null +++ b/contrib/idna @@ -0,0 +1 @@ +Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667 diff --git a/contrib/idna-cmake/CMakeLists.txt b/contrib/idna-cmake/CMakeLists.txt new file mode 100644 index 00000000000..1138b836192 --- /dev/null +++ b/contrib/idna-cmake/CMakeLists.txt @@ -0,0 +1,24 @@ +option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES}) +if ((NOT ENABLE_IDNA)) + message (STATUS "Not using idna") + return() +endif() +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna") + +set (SRCS + "${LIBRARY_DIR}/src/idna.cpp" + "${LIBRARY_DIR}/src/mapping.cpp" + "${LIBRARY_DIR}/src/mapping_tables.cpp" + "${LIBRARY_DIR}/src/normalization.cpp" + "${LIBRARY_DIR}/src/normalization_tables.cpp" + "${LIBRARY_DIR}/src/punycode.cpp" + "${LIBRARY_DIR}/src/to_ascii.cpp" + "${LIBRARY_DIR}/src/to_unicode.cpp" + "${LIBRARY_DIR}/src/unicode_transcoding.cpp" + "${LIBRARY_DIR}/src/validity.cpp" +) + +add_library (_idna ${SRCS}) +target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include") + +add_library (ch_contrib::idna ALIAS _idna) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 406bac73e90..d09060912d8 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -11,7 +11,9 @@ option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during quer option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT}) -if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER) +option (ENABLE_BLAKE3 "Enable BLAKE3 function" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER AND NOT ENABLE_BLAKE3) message(STATUS "Not using LLVM") return() endif() @@ -26,61 +28,75 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") # and llvm cannot be compiled with bundled libcxx and 20 standard. set (CMAKE_CXX_STANDARD 14) -# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. -set (REQUIRED_LLVM_LIBRARIES - LLVMExecutionEngine - LLVMRuntimeDyld - LLVMAsmPrinter - LLVMDebugInfoDWARF - LLVMGlobalISel - LLVMSelectionDAG - LLVMMCDisassembler - LLVMPasses - LLVMCodeGen - LLVMipo - LLVMBitWriter - LLVMInstrumentation - LLVMScalarOpts - LLVMAggressiveInstCombine - LLVMInstCombine - LLVMVectorize - LLVMTransformUtils - LLVMTarget - LLVMAnalysis - LLVMProfileData - LLVMObject - LLVMBitReader - LLVMCore - LLVMRemarks - LLVMBitstreamReader - LLVMMCParser - LLVMMC - LLVMBinaryFormat - LLVMDebugInfoCodeView - LLVMSupport - LLVMDemangle -) +if (ARCH_AMD64) + set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") +elseif (ARCH_AARCH64) + set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") +elseif (ARCH_PPC64LE) + set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") +elseif (ARCH_S390X) + set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") +elseif (ARCH_RISCV64) + set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") +endif () + + +if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER) + # Only compiling blake3 + set (REQUIRED_LLVM_LIBRARIES LLVMSupport) +else() + # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. + set (REQUIRED_LLVM_LIBRARIES + LLVMExecutionEngine + LLVMRuntimeDyld + LLVMAsmPrinter + LLVMDebugInfoDWARF + LLVMGlobalISel + LLVMSelectionDAG + LLVMMCDisassembler + LLVMPasses + LLVMCodeGen + LLVMipo + LLVMBitWriter + LLVMInstrumentation + LLVMScalarOpts + LLVMAggressiveInstCombine + LLVMInstCombine + LLVMVectorize + LLVMTransformUtils + LLVMTarget + LLVMAnalysis + LLVMProfileData + LLVMObject + LLVMBitReader + LLVMCore + LLVMRemarks + LLVMBitstreamReader + LLVMMCParser + LLVMMC + LLVMBinaryFormat + LLVMDebugInfoCodeView + LLVMSupport + LLVMDemangle + ) + + if (ARCH_AMD64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) + elseif (ARCH_AARCH64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) + elseif (ARCH_PPC64LE) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) + elseif (ARCH_S390X) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) + elseif (ARCH_RISCV64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) + endif () +endif() + # Skip useless "install" instructions from CMake: set (LLVM_INSTALL_TOOLCHAIN_ONLY 1 CACHE INTERNAL "") -if (ARCH_AMD64) - set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -elseif (ARCH_AARCH64) - set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -elseif (ARCH_PPC64LE) - set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) -elseif (ARCH_S390X) - set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) -elseif (ARCH_RISCV64) - set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) -endif () - message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}") set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 06bb3f2cdda..59170af8edf 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.11.2.11" +ARG VERSION="23.11.3.23" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index e7b0d4e15e5..b577775277e 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.11.2.11" +ARG VERSION="23.11.3.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8cb4bf94ac9..6dc764bd0b9 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.11.2.11" +ARG VERSION="23.11.3.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 45e55e7a79c..4255a529f6d 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -34,7 +34,7 @@ services: # Empty container to run proxy resolver. resolver: - image: clickhouse/python-bottle + image: clickhouse/python-bottle:${DOCKER_PYTHON_BOTTLE_TAG:-latest} expose: - "8080" tty: true diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 806b57c4616..c9ce5697182 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -40,6 +40,12 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + # It is not needed, we will explicitly create tables on s3. + # We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository. + rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml +fi + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -123,8 +129,76 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" - clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" - clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, + EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, + UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, + RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), + URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, + UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, + MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, + SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, + ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, + SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, + FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, + HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, + GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, + HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, + HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, + FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, + LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, + RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, + ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, + OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, + UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, + URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, + VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, + Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, + EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, + AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), + RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, + SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, + ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, + SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, + UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, + FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, + Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, + BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), + Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), + WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, + ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, + ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, + ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, + ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, + ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, + OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, + UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, + PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, + PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), + CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, + StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, + OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, + UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, + DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) + ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + + clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" + clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" + else + clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" + clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi @@ -144,6 +218,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--replicated-database') fi + if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--s3-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index bfa9f9938ab..4e9486d7286 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -58,6 +58,7 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/users.d/s3_cache_new.xml + rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml fi # For flaky check we also enable thread fuzzer @@ -216,11 +217,11 @@ export -f run_tests if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 1204434d853..9b6ab535a90 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -35,4 +35,17 @@ function fn_exists() { declare -F "$1" > /dev/null; } +function timeout_with_logging() { + local exit_code=0 + + timeout "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + # vi: ft=bash diff --git a/docs/changelogs/v23.11.3.23-stable.md b/docs/changelogs/v23.11.3.23-stable.md new file mode 100644 index 00000000000..7fcc65beb54 --- /dev/null +++ b/docs/changelogs/v23.11.3.23-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.3.23-stable (a14ab450b0e) FIXME as compared to v23.11.2.11-stable (6e5411358c8) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). +* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). +* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL CATEGORY + +* Backported in [#57918](https://github.com/ClickHouse/ClickHouse/issues/57918):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)). +* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). + diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index b024820024a..5e81eacc937 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -67,7 +67,6 @@ Engines in the family: Engines in the family: - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) - [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) - [Merge](../../engines/table-engines/special/merge.md#merge) - [File](../../engines/table-engines/special/file.md#file) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 9af857b0835..44febe78c77 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -212,5 +212,5 @@ ORDER BY key ASC ``` ### More information on Joins -- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm) +- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#join_algorithm) - [JOIN clause](/docs/en/sql-reference/statements/select/join.md) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 19221c256f9..96e6bab6997 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -236,7 +236,7 @@ libhdfs3 support HDFS namenode HA. ## Storage Settings {#storage-settings} -- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index de1a090d491..141d87fed20 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -54,7 +54,7 @@ Optional parameters: - `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. -- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. - `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. - `kafka_client_id` — Client identifier. Empty by default. @@ -151,7 +151,7 @@ Example: SELECT level, sum(total) FROM daily GROUP BY level; ``` -To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. +To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. To stop receiving topic data or to change the conversion logic, detach the materialized view: diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 37a41159fab..e898d1f1b82 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -58,7 +58,7 @@ Optional parameters: - `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`. - `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster. - `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). -- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `nats_username` - NATS username. - `nats_password` - NATS password. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 53c6e089a70..0f3fef3d6fb 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -65,7 +65,7 @@ Optional parameters: - `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified. - `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`. - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`. -- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. - `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 3144bdd32fa..dfa06801d04 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -222,7 +222,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ## Storage Settings {#storage-settings} -- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 97d37e476ae..23d98d4b20e 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -12,7 +12,7 @@ In most cases you do not need a partition key, and in most other cases you do no You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression. ::: -Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. +Partitioning is available for the [MergeTree family tables](../../../engines/table-engines/mergetree-family/mergetree.md), including [replicated tables](../../../engines/table-engines/mergetree-family/replication.md) and [materialized views](../../../sql-reference/statements/create/view.md#materialized-view). A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition. diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 6224c450ea2..de8ae0357dc 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -112,7 +112,7 @@ Specifying the `sharding_key` is necessary for the following: For **Insert limit settings** (`..._insert`) see also: - [distributed_foreground_insert](../../../operations/settings/settings.md#distributed_foreground_insert) setting -- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting +- [prefer_localhost_replica](../../../operations/settings/settings.md#prefer-localhost-replica) setting - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` ::: @@ -198,7 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `9440` and be configured with correct certificates. - `compression` - Use data compression. Default value: `true`. -When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. +When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard. @@ -243,7 +243,7 @@ If the server ceased to exist or had a rough restart (for example, due to a hard When querying a `Distributed` table, `SELECT` queries are sent to all shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. -When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). +When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#max_parallel_replicas). To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation. diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 6e3897398a5..fdf5242ba3b 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -101,8 +101,8 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Settings {#settings} -- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. +- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. +- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. diff --git a/docs/en/engines/table-engines/special/filelog.md b/docs/en/engines/table-engines/special/filelog.md index eef9a17444e..82201053bc5 100644 --- a/docs/en/engines/table-engines/special/filelog.md +++ b/docs/en/engines/table-engines/special/filelog.md @@ -41,7 +41,7 @@ Optional parameters: - `poll_timeout_ms` - Timeout for single poll from log file. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). - `poll_max_batch_size` — Maximum amount of records to be polled in a single poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). -- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `max_threads` - Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4). - `poll_directory_watch_events_backoff_init` - The initial sleep value for watch directory thread. Default: `500`. - `poll_directory_watch_events_backoff_max` - The max sleep value for watch directory thread. Default: `32000`. diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md deleted file mode 100644 index d5f3b364d4e..00000000000 --- a/docs/en/engines/table-engines/special/materializedview.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -slug: /en/engines/table-engines/special/materializedview -sidebar_position: 100 -sidebar_label: MaterializedView ---- - -# MaterializedView Table Engine - -Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 63f75fb7830..4eeb19cefcf 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -167,7 +167,7 @@ For successful requests that do not return a data table, an empty response body You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed. -You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting. +You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#http_native_compression_disable_checksumming_on_decompress) setting. If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method. @@ -183,7 +183,7 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP - `snappy` To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. -In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods. +In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#http_zlib_compression_level) setting for all compression methods. :::info Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. @@ -285,7 +285,7 @@ For information about other parameters, see the section “SET”. Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session. -You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: +You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#send_progress_in_http_headers). Example of the header sequence: ``` text X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"} @@ -496,7 +496,7 @@ Next are the configuration methods for different `type`. `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. -The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. +The following example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. :::note To keep the default `handlers` such as` query`, `play`,` ping`, add the `` rule. @@ -539,7 +539,7 @@ In `dynamic_query_handler`, the query is written in the form of parameter of the ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the parameter is not passed in. -To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully. +To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` and `queries` whether the settings were set successfully. Example: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index e60aff927c4..0e09ab6a0b7 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -25,6 +25,7 @@ ClickHouse server provides embedded visual interfaces for power users: - Play UI: open `/play` in the browser; - Advanced Dashboard: open `/dashboard` in the browser; +- Binary symbols viewer for ClickHouse engineers: open `/binary` in the browser; There are also a wide range of third-party libraries for working with ClickHouse: diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index adc384e21ae..de61da6f5c4 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -64,4 +64,4 @@ You can configure ClickHouse to export metrics to [Prometheus](https://prometheu Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`. -To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. +To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 206f710734e..194d2714422 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -42,7 +42,7 @@ To analyze the `trace_log` system table: - Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). -- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. +- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting. For security reasons, introspection functions are disabled by default. diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index def0f48b968..50c5ff4457f 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -29,6 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. This reduces maintenance effort and avoids redundancy. +:::note +Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results. +::: + ## Configuration Settings and Usage Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the @@ -99,7 +103,7 @@ It is also possible to limit the cache usage of individual users using [settings constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may allocate in the query cache and the maximum number of stored query results. For that, first provide configurations [query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and -[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings +[query_cache_max_entries](settings/settings.md#query-cache-max-entries) in a user profile in `users.xml`, then make both settings readonly: ``` xml @@ -140,7 +144,7 @@ value can be specified at session, profile or query level using setting [query_c Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). -ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#settings-max_block_size) rows. Due to filtering, aggregation, +ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation, etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks are squashed (if they are tiny) or split (if they are large) into blocks of 'max_block_size' size before insertion into the query result diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 01e30c84526..48434d992e2 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -2009,7 +2009,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to ## query_thread_log {#query_thread_log} -Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. +Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#log-query-threads) setting. Queries are logged in the [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). @@ -2051,7 +2051,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the ## query_views_log {#query_views_log} -Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting. +Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#log-query-views) setting. Queries are logged in the [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). @@ -2331,7 +2331,7 @@ For the value of the `incl` attribute, see the section “[Configuration files]( **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards) - [Cluster Discovery](../../operations/cluster-discovery.md) - [Replicated database engine](../../engines/database-engines/replicated.md) diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 9e36aa26946..1cb7ec9dced 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -139,7 +139,7 @@ Limit on the number of bytes in the result. The same as the previous setting. What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. -Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#settings-max_threads). +Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads). Example: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index dc3baf09d00..6e087467bb9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -460,6 +460,12 @@ Possible values: Default value: 1048576. +## http_make_head_request {#http-make-head-request} + +The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests. + +Default value: `true`. + ## table_function_remote_max_addresses {#table_function_remote_max_addresses} Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function. @@ -1710,7 +1716,7 @@ Default value: `1` ## query_cache_squash_partial_results {#query-cache-squash-partial-results} -Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query_cache_compress_entries)). +Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query-cache-compress-entries)). Possible values: @@ -2480,7 +2486,7 @@ See also: - [load_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap](#distributed_replica_error_cap) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [distributed_replica_error_half_life](#distributed_replica_error_half_life) ## distributed_background_insert_sleep_time_ms {#distributed_background_insert_sleep_time_ms} @@ -4158,6 +4164,41 @@ Result: └─────┴─────┴───────┘ ``` +## enable_order_by_all {#enable-order-by-all} + +Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md) + +Possible values: + +- 0 — Disable ORDER BY ALL. +- 1 — Enable ORDER BY ALL. + +Default value: `1`. + +**Example** + +Query: + +```sql +CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory(); + +INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); + +SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous + +SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all; +``` + +Result: + +```text +┌─C1─┬─C2─┬─ALL─┐ +│ 20 │ 20 │ 10 │ +│ 30 │ 10 │ 20 │ +│ 10 │ 20 │ 30 │ +└────┴────┴─────┘ +``` + ## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string} Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array. @@ -4674,7 +4715,7 @@ Possible values: Default value: `false`. -## rename_files_after_processing +## rename_files_after_processing {#rename_files_after_processing} - **Type:** String @@ -5093,3 +5134,25 @@ When set to `true` than for all s3 requests first two attempts are made with low When set to `false` than all attempts are made with identical timeouts. Default value: `true`. + +## max_partition_size_to_drop + +Restriction on dropping partitions in query time. + +Default value: 50 GB. +The value 0 means that you can drop partitions without any restrictions. + +:::note +This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop) +::: + +## max_table_size_to_drop + +Restriction on deleting tables in query time. + +Default value: 50 GB. +The value 0 means that you can delete all tables without any restrictions. + +:::note +This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop) +::: \ No newline at end of file diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index e46b495239c..90c5a7d2e7a 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -239,6 +239,10 @@ The amount of virtual memory mapped for the pages of machine code of the server The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes. It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call. This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring. +### MemoryResidentMax + +Maximum amount of physical memory used by the server process, in bytes. + ### MemoryResident The amount of physical memory used by the server process, in bytes. diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 2659f80e338..63cc083e4bc 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -78,5 +78,5 @@ is_active: NULL **See Also** - [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) +- [distributed_replica_error_cap setting](../../operations/settings/settings.md#distributed_replica_error_cap) +- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#distributed_replica_error_half_life) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 4f5e214f1ce..7fcc4928355 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -11,7 +11,7 @@ This table does not contain the ingested data for `INSERT` queries. You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. -You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#settings-log-queries). We do not recommend to turn off logging because information in this table is important for solving issues. +You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#log-queries). We do not recommend to turn off logging because information in this table is important for solving issues. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. @@ -30,7 +30,7 @@ Each query creates one or two rows in the `query_log` table, depending on the st You can use the [log_queries_probability](../../operations/settings/settings.md#log-queries-probability) setting to reduce the number of queries, registered in the `query_log` table. -You can use the [log_formatted_queries](../../operations/settings/settings.md#settings-log-formatted-queries) setting to log formatted queries to the `formatted_query` column. +You can use the [log_formatted_queries](../../operations/settings/settings.md#log-formatted-queries) setting to log formatted queries to the `formatted_query` column. Columns: @@ -101,7 +101,7 @@ Columns: - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) - `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. +- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#max_query_size). An empty string if it is not defined. - `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously. - `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index a198d7c304f..0420a0392f2 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -8,7 +8,7 @@ Contains information about threads that execute queries, for example, thread nam To start logging: 1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section. -2. Set [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1. +2. Set [log_query_threads](../../operations/settings/settings.md#log-query-threads) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 4dd8dd7420d..41a69da70aa 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -8,7 +8,7 @@ Contains information about the dependent views executed when running a query, fo To start logging: 1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. -2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. +2. Set [log_query_views](../../operations/settings/settings.md#log-query-views) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 08594739ecf..56668abae31 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -14,7 +14,7 @@ This table contains the following columns (the column type is shown in brackets) - `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. - `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. -- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). +- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#max-insert-threads) setting). Example: diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index e4461e14236..8049ab091c0 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -29,7 +29,7 @@ Columns: - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. -- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). +- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table). - `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. @@ -57,6 +57,8 @@ Columns: - If the table stores data on disk, returns used space on disk (i.e. compressed). - If the table stores data in memory, returns approximated number of used bytes in memory. +- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it's possible to quickly determine the exact number of bytes from the part checksums for the table on storage, otherwise `NULL` (does not take underlying storage (if any) into account). + - `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). - `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index a40108a331a..ca4067c8d8c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -28,7 +28,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/ **Details** -ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function. +ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function. The `SELECT count() FROM table` query is optimized by default using metadata from MergeTree. If you need to use row-level security, disable optimization using the [optimize_trivial_count_query](../../../operations/settings/settings.md#optimize-trivial-count-query) setting. diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 4f021b25809..9f86aaf2502 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -394,7 +394,7 @@ Configuration example: or ``` sql -LAYOUT(HASHED_ARRAY()) +LAYOUT(HASHED_ARRAY([SHARDS 1])) ``` ### complex_key_hashed_array @@ -412,7 +412,7 @@ Configuration example: or ``` sql -LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1])) ``` ### range_hashed {#range_hashed} @@ -2415,8 +2415,8 @@ clickhouse client \ --secure \ --password MY_PASSWORD \ --query " - INSERT INTO regexp_dictionary_source_table - SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') + INSERT INTO regexp_dictionary_source_table + SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') FORMAT CSV" < regexp_dict.csv ``` diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 00efa63c960..f5da00a8663 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -143,7 +143,7 @@ range([start, ] end [, step]) **Implementation details** - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. -- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#function_range_max_elements_in_block) setting. - Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 8cb35483555..1025b8bdc3d 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -16,7 +16,7 @@ For proper operation of introspection functions: - Install the `clickhouse-common-static-dbg` package. -- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting to 1. For security reasons introspection functions are disabled by default. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 5b9d01985dd..35f9c7af2ce 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2831,3 +2831,92 @@ Result: │ SELECT a, b FROM tab WHERE (a > 3) AND (b < 3) │ └─────────────────────────────────────────────────────────────────────────┘ ``` + +## minSampleSizeConversion + +Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. + +**Syntax** + +``` sql +minSampleSizeConversion(baseline, mde, power, alpha) +``` + +Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the sample size required for one group (i.e. the sample size required for the whole experiment is twice the returned value). + +**Arguments** + +- `baseline` — Baseline conversion. [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage points (e.g. for a baseline conversion 0.25 the MDE 0.03 means an expected change to 0.25 ± 0.03). [Float](../data-types/float.md). +- `power` — Required statistical power of a test (1 - probability of Type II error). [Float](../data-types/float.md). +- `alpha` — Required significance level of a test (probability of Type I error). [Float](../data-types/float.md). + +**Returned value** + +A named [Tuple](../data-types/tuple.md) with 3 elements: + +- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md). +- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline - mde`. [Float64](../data-types/float.md). +- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline + mde`. [Float64](../data-types/float.md). + +**Example** + +The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: + +``` sql +SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size; +``` + +Result: + +``` text +┌─sample_size───────────────────┐ +│ (3396.077603219163,0.22,0.28) │ +└───────────────────────────────┘ +``` + +## minSampleSizeContinuous + +Calculates minimum required sample size for an A/B test comparing means of a continuous metric in two samples. + +**Syntax** + +``` sql +minSampleSizeContinous(baseline, sigma, mde, power, alpha) +``` + +Alias: `minSampleSizeContinous` + +Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the required sample size for one group (i.e. the sample size required for the whole experiment is twice the returned value). Also assumes equal variance of the test metric in treatment and control groups. + +**Arguments** + +- `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). + +**Returned value** + +A named [Tuple](../data-types/tuple.md) with 3 elements: + +- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md). +- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 - mde)`. [Float64](../data-types/float.md). +- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 + mde)`. [Float64](../data-types/float.md). + +**Example** + +The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: + +``` sql +SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size; +``` + +Result: + +``` text +┌─sample_size───────────────────────────┐ +│ (616.2931945826209,108.8825,115.6175) │ +└───────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 20694211912..5bad7d53e62 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1383,6 +1383,71 @@ Result: └──────────────────┘ ``` +## punycodeEncode + +Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string. +The string must be UTF8-encoded, otherwise results are undefined. + +**Syntax** + +``` sql +punycodeEncode(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- A Punycode representation of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select punycodeEncode('München'); +``` + +Result: + +```result +┌─punycodeEncode('München')─┐ +│ Mnchen-3ya │ +└───────────────────────────┘ +``` + +## punycodeDecode + +Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string. + +**Syntax** + +``` sql +punycodeEncode(val) +``` + +**Arguments** + +- `val` - Punycode-encoded string. [String](../data-types/string.md) + +**Returned value** + +- The plaintext of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select punycodeDecode('Mnchen-3ya'); +``` + +Result: + +```result +┌─punycodeEncode('Mnchen-3ya')─┐ +│ München │ +└──────────────────────────────┘ +``` + ## byteHammingDistance Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings. diff --git a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md new file mode 100644 index 00000000000..7a11d66e739 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md @@ -0,0 +1,22 @@ +--- +slug: /en/sql-reference/statements/alter/apply-deleted-mask +sidebar_position: 46 +sidebar_label: APPLY DELETED MASK +--- + +# Apply mask of deleted rows + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] APPLY DELETED MASK [IN PARTITION partition_id] +``` + +The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```. + +:::note +It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +::: + +**See also** + +- [Lightweight deletes](/docs/en/sql-reference/statements/delete) +- [Heavyweight deletes](/docs/en/sql-reference/statements/alter/delete.md) diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index d28542e0a43..dc6668c7983 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -17,8 +17,9 @@ Most `ALTER TABLE` queries modify table settings or data: - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) - [TTL](/docs/en/sql-reference/statements/alter/ttl.md) - [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md) +- [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md) -:::note +:::note Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md). ::: @@ -59,7 +60,7 @@ For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settin You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting. -:::note +:::note For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index e0cc98c2351..f9d93305071 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] [SETTINGS ...] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] [SETTINGS ...] [FORMAT format_name] ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 07b5a196096..b5fc0a23745 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -17,7 +17,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. No OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] ``` -The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. +The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family (including [materialized views](../../sql-reference/statements/create/view.md#materialized-view)) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`). diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index a4f449ad321..06742ff74e2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that - Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. -**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). +`FINAL` requires additional compute and memory resources, as the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results, and is less expensive than running `OPTIMIZE` to force a merge. It is also sometimes possible to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). If you need to use FINAL in your queries in order to get the required results, then it is okay to do so but be aware of the additional processing required. `FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 281a1d0436c..0529be06b5d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -43,22 +43,23 @@ Additional join types available in ClickHouse: - `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. - `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. +- `PASTE JOIN`, performs a horizontal concatenation of two tables. :::note -When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). +When [join_algorithm](../../../operations/settings/settings.md#join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). ::: ## Settings -The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting. +The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#join_default_strictness) setting. The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. **See also** -- [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) -- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) +- [join_algorithm](../../../operations/settings/settings.md#join_algorithm) +- [join_any_take_last_row](../../../operations/settings/settings.md#join_any_take_last_row) - [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) - [partial_merge_join_optimizations](../../../operations/settings/settings.md#partial_merge_join_optimizations) - [partial_merge_join_rows_in_right_blocks](../../../operations/settings/settings.md#partial_merge_join_rows_in_right_blocks) @@ -269,6 +270,33 @@ For example, consider the following tables: `ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: +## PASTE JOIN Usage + +The result of `PASTE JOIN` is a table that contains all columns from left subquery followed by all columns from the right subquery. +The rows are matched based on their positions in the original tables (the order of rows should be defined). +If the subqueries return a different number of rows, extra rows will be cut. + +Example: +```SQL +SELECT * +FROM +( + SELECT number AS a + FROM numbers(2) +) AS t1 +PASTE JOIN +( + SELECT number AS a + FROM numbers(2) + ORDER BY a DESC +) AS t2 + +┌─a─┬─t2.a─┐ +│ 0 │ 1 │ +│ 1 │ 0 │ +└───┴──────┘ +``` + ## Distributed JOIN There are two ways to execute join involving distributed tables: @@ -352,7 +380,7 @@ If you need a `JOIN` for joining with dimension tables (these are relatively sma ### Memory Limitations -By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) setting. +By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#join_algorithm) setting. If you need to restrict `JOIN` operation memory consumption use the following settings: diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 53bdc9041a1..d6432a7b4f8 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -5,12 +5,22 @@ sidebar_label: ORDER BY # ORDER BY Clause -The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. Sorting is case-sensitive. +The `ORDER BY` clause contains -If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). +- a list of expressions, e.g. `ORDER BY visits, search_phrase`, +- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or +- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`. -Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time). -If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well. +To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0. +To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0. + +The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction. +Unless an explicit sort order is specified, `ASC` is used by default. +The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`. +Also, sorting is performed case-sensitively. + +Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order. +If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic. ## Sorting of Special Values @@ -265,8 +275,9 @@ Consider disabling `optimize_read_in_order` manually, when running queries that Optimization is supported in the following table engines: -- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) -- [Merge](../../../engines/table-engines/special/merge.md), [Buffer](../../../engines/table-engines/special/buffer.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) table engines over `MergeTree`-engine tables +- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (including [materialized views](../../../sql-reference/statements/create/view.md#materialized-view)), +- [Merge](../../../engines/table-engines/special/merge.md), +- [Buffer](../../../engines/table-engines/special/buffer.md) In `MaterializedView`-engine tables the optimization works with views like `SELECT ... FROM merge_tree_table ORDER BY pk`. But it is not supported in the queries like `SELECT ... FROM view ORDER BY pk` if the view query does not have the `ORDER BY` clause. diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index f5651c2dcb6..6dcb3e75e48 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -16,7 +16,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions). -Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. +Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. When using the `Values` format in an `INSERT` query, it may seem that data is parsed the same as expressions in a `SELECT` query, but this is not true. The `Values` format is much more limited. diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index a083c6b89a6..ad92ab39183 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -55,5 +55,5 @@ Connection settings like `host`, `port`, `user`, `password`, `compression`, `sec **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) -- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) +- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards) +- [load_balancing](../../operations/settings/settings.md#load_balancing) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index f0de4a405a0..3a63811add6 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -199,11 +199,11 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 ## Settings {#settings} -- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. +- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. +- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. **See Also** diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 463632f4e07..92f904b8841 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -100,7 +100,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin ## Storage Settings {#storage-settings} -- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs. diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 3ca177050d3..228f4a4c7e1 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -165,5 +165,5 @@ The following pattern types are supported. - `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`. - `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`. -The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting. +The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#load_balancing) setting. The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index dc11259c626..8065f066666 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer **Syntax** ``` sql -s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` :::tip GCS @@ -38,6 +38,8 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ ::: - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. @@ -236,7 +238,7 @@ LIMIT 5; ## Storage Settings {#storage-settings} -- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 799eb31446a..080c9860519 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -10,14 +10,15 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google **Syntax** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure]) ``` **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 7deef68f47f..fe40cb0c507 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **Синтаксис** ``` sql -s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` **Aргументы** diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index b8f34d805ff..b382bf5e384 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -11,14 +11,14 @@ sidebar_label: s3Cluster **Синтаксис** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key [,session_token]] [,format] [,structure]) ``` **Аргументы** - `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. - `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. +- `access_key_id`, `secret_access_key` и `session_token` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 01f702a4b1e..3286fc9f9e7 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -61,6 +61,22 @@ sidebar_label: ORDER BY 我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。 +## ORDER BY ALL + +`ORDER BY ALL` 对所有选定的列进行升序排序。 + +示例: + +``` sql +SELECT a, b, c FROM t ORDER BY ALL +``` + +等同于: + +``` sql +SELECT a, b, c FROM t ORDER BY a, b, c +``` + ## 实现细节 {#implementation-details} 更少的RAM使用,如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。 diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index a62fa9ebb19..f7384a7526e 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **语法** ``` sql -s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] ,format, structure, [compression]) ``` **参数** diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index ef24eeaa6d7..59fc6c0c17f 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -35,7 +35,6 @@ #include #include #include -#include /** A tool for evaluating ClickHouse performance. diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index d7d61bbcd3b..8e7f38b6a1e 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,9 @@ #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" +extern const char * auto_time_zones[]; + + namespace DB { namespace ErrorCodes @@ -133,9 +137,25 @@ int mainEntryClickHouseFormat(int argc, char ** argv) auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames(); auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames(); + auto all_known_settings = Settings().getAllRegisteredNames(); + auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames(); additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end()); additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end()); + additional_names.insert(all_known_settings.begin(), all_known_settings.end()); + additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end()); + + for (auto * it = auto_time_zones; *it; ++it) + { + String time_zone_name = *it; + + /// Example: Europe/Amsterdam + Strings split; + boost::split(split, time_zone_name, [](char c){ return c == '/'; }); + for (const auto & word : split) + if (!word.empty()) + additional_names.insert(word); + } KnownIdentifierFunc is_known_identifier = [&](std::string_view name) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f82fdf1d97b..cfd4a9540ae 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -771,6 +771,7 @@ void LocalServer::processConfig() global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); + global_context->setQueryParameters(query_parameters); } @@ -817,6 +818,7 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o std::cout << getHelpHeader() << "\n"; std::cout << options_description.main_description.value() << "\n"; std::cout << getHelpFooter() << "\n"; + std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; #endif } @@ -893,7 +895,31 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum for (int arg_num = 1; arg_num < argc; ++arg_num) { std::string_view arg = argv[arg_num]; - if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) + /// Parameter arg after underline. + if (arg.starts_with("--param_")) + { + auto param_continuation = arg.substr(strlen("--param_")); + auto equal_pos = param_continuation.find_first_of('='); + + if (equal_pos == std::string::npos) + { + /// param_name value + ++arg_num; + if (arg_num >= argc) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value"); + arg = argv[arg_num]; + query_parameters.emplace(String(param_continuation), String(arg)); + } + else + { + if (equal_pos == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty"); + + /// param_name=value + query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1)); + } + } + else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) { /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' ++arg_num; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 876827f249d..481510d681f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -2128,10 +2128,9 @@ void Server::createServers( { const Settings & settings = global_context->getSettingsRef(); - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); @@ -2385,10 +2384,9 @@ void Server::createInterserverServers( { const Settings & settings = global_context->getSettingsRef(); - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) diff --git a/programs/server/binary.html b/programs/server/binary.html new file mode 100644 index 00000000000..988dd33a72a --- /dev/null +++ b/programs/server/binary.html @@ -0,0 +1,267 @@ + + + + + + ClickHouse Binary Viewer + + + + + +
+
+ + + + + diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 79ca983ae7f..04fdfb2d3ca 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -965,12 +965,10 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => { function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) { let legendEl; - let showTop = false; - const showLimit = 5; + let multiline; function init(u, opts) { legendEl = u.root.querySelector(".u-legend"); - legendEl.classList.remove("u-inline"); className && legendEl.classList.add(className); @@ -986,18 +984,19 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- ...style }); + const nodes = legendEl.querySelectorAll("th"); + for (let i = 0; i < nodes.length; i++) + nodes[i]._order = i; + if (opts.series.length == 2) { - const nodes = legendEl.querySelectorAll("th"); + multiline = false; for (let i = 0; i < nodes.length; i++) nodes[i].style.display = "none"; } else { + multiline = true; legendEl.querySelector("th").remove(); legendEl.querySelector("td").setAttribute('colspan', '2'); legendEl.querySelector("td").style.textAlign = 'center'; - } - - if (opts.series.length - 1 > showLimit) { - showTop = true; let footer = legendEl.insertRow().insertCell(); footer.setAttribute('colspan', '2'); footer.style.textAlign = 'center'; @@ -1024,18 +1023,20 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- left -= legendEl.clientWidth / 2; top -= legendEl.clientHeight / 2; legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; - if (showTop) { + + if (multiline) { let nodes = nodeListToArray(legendEl.querySelectorAll("tr")); let header = nodes.shift(); let footer = nodes.pop(); - nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; }); - nodes.sort((a, b) => +b._sort_key - +a._sort_key); + let showLimit = Math.floor(u.height / 30); + nodes.forEach(function (node) { node._sort_key = nodes.length > showLimit ? +node.querySelector("td").textContent.replace(/,/g,'') : node._order; }); + nodes.sort((a, b) => b._sort_key - a._sort_key); nodes.forEach(function (node) { node.parentNode.appendChild(node); }); for (let i = 0; i < nodes.length; i++) { nodes[i].style.display = i < showLimit ? null : "none"; - delete nodes[i]._sort_key; } footer.parentNode.appendChild(footer); + footer.style.display = nodes.length > showLimit ? null : "none"; } } diff --git a/rust/BLAKE3/CMakeLists.txt b/rust/BLAKE3/CMakeLists.txt deleted file mode 100644 index ceb0a647b66..00000000000 --- a/rust/BLAKE3/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -clickhouse_import_crate(MANIFEST_PATH Cargo.toml) -target_include_directories(_ch_rust_blake3 INTERFACE include) -add_library(ch_rust::blake3 ALIAS _ch_rust_blake3) diff --git a/rust/BLAKE3/Cargo.toml b/rust/BLAKE3/Cargo.toml deleted file mode 100644 index ed414fa54c1..00000000000 --- a/rust/BLAKE3/Cargo.toml +++ /dev/null @@ -1,20 +0,0 @@ -[package] -name = "_ch_rust_blake3" -version = "0.1.0" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -blake3 = "1.2.0" -libc = "0.2.132" - -[lib] -crate-type = ["staticlib"] - -[profile.release] -debug = true - -[profile.release-thinlto] -inherits = "release" -# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit -lto = true diff --git a/rust/BLAKE3/include/blake3.h b/rust/BLAKE3/include/blake3.h deleted file mode 100644 index 5dc7d5bd902..00000000000 --- a/rust/BLAKE3/include/blake3.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BLAKE3_H -#define BLAKE3_H - -#include - - -extern "C" { - -char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data); - -void blake3_free_char_pointer(char *ptr_to_free); - -} // extern "C" - -#endif /* BLAKE3_H */ diff --git a/rust/BLAKE3/src/lib.rs b/rust/BLAKE3/src/lib.rs deleted file mode 100644 index 7a3be8a2ae7..00000000000 --- a/rust/BLAKE3/src/lib.rs +++ /dev/null @@ -1,31 +0,0 @@ -extern crate blake3; -extern crate libc; - -use std::ffi::{CString}; -use std::slice; -use std::os::raw::c_char; - -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim( - begin: *const c_char, - size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - let input_res = slice::from_raw_parts(begin as *const u8, size as usize); - let mut hasher = blake3::Hasher::new(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - - reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); - std::ptr::null_mut() -} - -// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw -#[no_mangle] -pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) { - std::mem::drop(CString::from_raw(ptr_to_free)); -} diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 5ea806baa3b..66694ee16f8 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -99,6 +99,5 @@ function(add_rust_subdirectory src) VERBATIM) endfunction() -add_rust_subdirectory (BLAKE3) add_rust_subdirectory (skim) add_rust_subdirectory (prql) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 04569cd3b3a..86bbec5579f 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2,14 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "_ch_rust_blake3" -version = "0.1.0" -dependencies = [ - "blake3", - "libc", -] - [[package]] name = "_ch_rust_prql" version = "0.1.0" @@ -30,9 +22,9 @@ dependencies = [ [[package]] name = "addr2line" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] @@ -45,24 +37,31 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.6" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ - "getrandom", + "cfg-if", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -95,43 +94,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" dependencies = [ "backtrace", ] @@ -146,12 +145,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - [[package]] name = "arrayvec" version = "0.7.4" @@ -166,9 +159,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.68" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", "cc", @@ -193,44 +186,24 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" - -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -240,24 +213,23 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.26" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", - "time 0.1.45", "wasm-bindgen", - "winapi", + "windows-targets 0.48.5", ] [[package]] name = "chumsky" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" dependencies = [ "hashbrown", "stacker", @@ -279,17 +251,11 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "constant_time_eq" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" - [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "crossbeam" @@ -307,9 +273,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5" dependencies = [ "cfg-if", "crossbeam-utils", @@ -317,9 +283,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -328,22 +294,21 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset 0.9.0", - "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "b9bcf5bdbfdd6030fb4a1c497b5d5fc5921aa2f60d359a17e249c0e6df3de153" dependencies = [ "cfg-if", "crossbeam-utils", @@ -351,28 +316,18 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f" dependencies = [ "cfg-if", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -382,18 +337,18 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] [[package]] name = "cxx" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d" +checksum = "e9fc0c733f71e58dedf4f034cd2a266f80b94cc9ed512729e1798651b68c2cba" dependencies = [ "cc", "cxxbridge-flags", @@ -403,9 +358,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397" +checksum = "51bc81d2664db24cf1d35405f66e18a85cffd4d49ab930c71a5c6342a410f38c" dependencies = [ "cc", "codespan-reporting", @@ -413,24 +368,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "cxxbridge-flags" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0" +checksum = "8511afbe34ea242697784da5cb2c5d4a0afb224ca8b136bdf93bfe180cbe5884" [[package]] name = "cxxbridge-macro" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c" +checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] @@ -478,6 +433,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "deranged" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_builder" version = "0.11.2" @@ -509,17 +473,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -556,28 +509,17 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "errno" -version = "0.3.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] @@ -595,40 +537,31 @@ dependencies = [ "thread_local", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "gimli" -version = "0.27.3" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash", + "allocator-api2", ] [[package]] @@ -639,22 +572,22 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -680,16 +613,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", "rustix", - "windows-sys", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", + "windows-sys 0.48.0", ] [[package]] @@ -702,16 +626,25 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.9" +name = "itertools" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" dependencies = [ "wasm-bindgen", ] @@ -724,9 +657,20 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.147" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.1", + "libc", + "redox_syscall", +] [[package]] name = "link-cplusplus" @@ -739,21 +683,21 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -825,37 +769,27 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" -version = "0.31.1" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "pin-utils" @@ -864,19 +798,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "proc-macro2" -version = "1.0.66" +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] [[package]] name = "prql-ast" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71194e75f14dbe7debdf2b5eca0812c978021a1bd23d6fe1da98b58e407e035a" +checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a" dependencies = [ "enum-as-inner", "semver", @@ -886,9 +826,9 @@ dependencies = [ [[package]] name = "prql-compiler" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff28e838b1be4227cc567a75c11caa3be25c5015f0e5fd21279c06e944ba44f" +checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c" dependencies = [ "anstream", "anyhow", @@ -912,9 +852,9 @@ dependencies = [ [[package]] name = "prql-parser" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3182e2ef0465a960eb02519b18768e39123d3c3a0037a2d2934055a3ef901870" +checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1" dependencies = [ "chumsky", "itertools 0.11.0", @@ -933,18 +873,18 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.31" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ "either", "rayon-core", @@ -952,41 +892,39 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] [[package]] name = "redox_users" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" dependencies = [ "getrandom", - "redox_syscall", + "libredox", "thiserror", ] [[package]] name = "regex" -version = "1.9.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", @@ -996,9 +934,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", @@ -1007,9 +945,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustc-demangle" @@ -1019,15 +957,15 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.38.6" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1038,15 +976,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "scratch" @@ -1056,38 +988,38 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] name = "semver" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" dependencies = [ "serde", ] [[package]] name = "serde" -version = "1.0.174" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.174" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -1112,7 +1044,7 @@ dependencies = [ "nix 0.25.1", "rayon", "regex", - "time 0.3.23", + "time", "timer", "tuikit", "unicode-width", @@ -1121,20 +1053,20 @@ dependencies = [ [[package]] name = "sqlformat" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" dependencies = [ - "itertools 0.10.5", + "itertools 0.12.0", "nom", "unicode_categories", ] [[package]] name = "sqlparser" -version = "0.36.1" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075" dependencies = [ "log", "serde", @@ -1170,23 +1102,17 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.1" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.27", + "syn 2.0.41", ] -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - [[package]] name = "syn" version = "1.0.109" @@ -1200,9 +1126,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.27" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ "proc-macro2", "quote", @@ -1222,31 +1148,31 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" dependencies = [ "winapi-util", ] [[package]] name = "thiserror" -version = "1.0.44" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.44" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] @@ -1261,30 +1187,21 @@ dependencies = [ [[package]] name = "time" -version = "0.1.45" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ + "deranged", + "powerfmt", "serde", "time-core", ] [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "timer" @@ -1309,23 +1226,17 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "typenum" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" - [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unicode_categories" @@ -1366,12 +1277,6 @@ dependencies = [ "quote", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1380,9 +1285,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1390,24 +1295,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1415,22 +1320,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" [[package]] name = "winapi" @@ -1450,9 +1355,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -1464,12 +1369,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -1478,68 +1383,154 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 2a2b582cea8..ac8b31a7290 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,7 +1,6 @@ # workspace is required to vendor crates for all packages. [workspace] members = [ - "BLAKE3", "skim", "prql", ] diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 5c101888140..b6ba562045d 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -51,10 +51,10 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls) { if (!aggregate_functions.contains(source_ignores_nulls)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found", source_ignores_nulls); if (!aggregate_functions.contains(target_respect_nulls)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found", target_respect_nulls); if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second) throw Exception( diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index a8254baac3a..94bb121893d 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -197,7 +197,7 @@ public: virtual void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { if (isState()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented", getName()); insertResultInto(place, to, arena); } diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp index 91e973c7573..aa6ee539934 100644 --- a/src/Analyzer/Passes/CNF.cpp +++ b/src/Analyzer/Passes/CNF.cpp @@ -536,7 +536,8 @@ CNF CNF::toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_gro if (!cnf) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Cannot convert expression '{}' to CNF, because it produces to many clauses." - "Size of boolean formula in CNF can be exponential of size of source formula."); + "Size of boolean formula in CNF can be exponential of size of source formula.", + node->formatConvertedASTForErrorMessage()); return *cnf; } diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 901867b8889..76b14c1a867 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -41,22 +42,6 @@ DataTypePtr getEnumType(const std::set & string_values) return getDataEnumType(string_values); } -QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_type, ContextPtr context) -{ - auto enum_literal = std::make_shared(result_type->getName(), std::make_shared()); - auto enum_literal_node = std::make_shared(std::move(enum_literal)); - - auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context)); - QueryTreeNodes arguments{ std::move(from), std::move(enum_literal_node) }; - - auto function_node = std::make_shared("_CAST"); - function_node->getArguments().getNodes() = std::move(arguments); - - function_node->resolveAsFunction(cast_function->build(function_node->getArgumentColumns())); - - return function_node; -} - /// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...)) /// where Enum is generated based on the possible values stored in string_values void changeIfArguments( diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 6fa6c8b0e78..371c0a07511 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { @@ -323,8 +325,21 @@ private: /// Because we reduce the number of operands here by eliminating the same equality checks, /// the only situation we can end up here is we had AND check where all the equality checks are the same so we know the type is UInt8. /// Otherwise, we will have > 1 operands and we don't have to do anything. - assert(!function_node.getResultType()->isNullable() && and_operands[0]->getResultType()->equals(*function_node.getResultType())); - node = std::move(and_operands[0]); + + auto operand_type = and_operands[0]->getResultType(); + auto function_type = function_node.getResultType(); + assert(!function_type->isNullable()); + if (!function_type->equals(*operand_type)) + { + /// Result of equality operator can be low cardinality, while AND always returns UInt8. + /// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8` + assert(function_type->equals(*removeLowCardinality(operand_type))); + node = createCastFunction(std::move(and_operands[0]), function_type, getContext()); + } + else + { + node = std::move(and_operands[0]); + } return; } @@ -389,11 +404,14 @@ private: continue; } + bool is_any_nullable = false; Tuple args; args.reserve(equals_functions.size()); /// first we create tuple from RHS of equals functions for (const auto & equals : equals_functions) { + is_any_nullable |= equals->getResultType()->isNullable(); + const auto * equals_function = equals->as(); assert(equals_function && equals_function->getFunctionName() == "equals"); @@ -421,8 +439,20 @@ private: in_function->getArguments().getNodes() = std::move(in_arguments); in_function->resolveAsFunction(in_function_resolver); - - or_operands.push_back(std::move(in_function)); + /** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8) + * is replaced with `k IN (1, NULL)` with result type UInt8. + * Convert it back to Nullable(UInt8). + */ + if (is_any_nullable && !in_function->getResultType()->isNullable()) + { + auto nullable_result_type = std::make_shared(in_function->getResultType()); + auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext()); + or_operands.push_back(std::move(in_function_nullable)); + } + else + { + or_operands.push_back(std::move(in_function)); + } } if (or_operands.size() == function_node.getArguments().getNodes().size()) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 1e63d5ca8e4..3290d918a8b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -119,6 +119,7 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; extern const int SYNTAX_ERROR; + extern const int UNEXPECTED_EXPRESSION; } /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. @@ -1209,6 +1210,8 @@ private: static void expandGroupByAll(QueryNode & query_tree_node_typed); + static void expandOrderByAll(QueryNode & query_tree_node_typed); + static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context); @@ -2312,6 +2315,35 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed) recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes); } +void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed) +{ + auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as(); + if (!all_node) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node."); + + auto & projection_nodes = query_tree_node_typed.getProjection().getNodes(); + auto list_node = std::make_shared(); + list_node->getNodes().reserve(projection_nodes.size()); + + for (auto & node : projection_nodes) + { + if (auto * identifier_node = node->as(); identifier_node != nullptr) + if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, + "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + + if (auto * function_node = node->as(); function_node != nullptr) + if (Poco::toUpper(function_node->getAlias()) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, + "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + + auto sort_node = std::make_shared(node, all_node->getSortDirection(), all_node->getNullsSortDirection()); + list_node->getNodes().push_back(sort_node); + } + + query_tree_node_typed.getOrderByNode() = list_node; +} + std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded( const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context) { @@ -6975,6 +7007,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING"); + if (settings.enable_order_by_all && query_node_typed.isOrderByAll()) + expandOrderByAll(query_node_typed); + /// Initialize aliases in query node scope QueryExpressionsAliasVisitor visitor(scope); diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h index 82bc72b7411..d8b8741afb2 100644 --- a/src/Analyzer/QueryNode.h +++ b/src/Analyzer/QueryNode.h @@ -219,6 +219,18 @@ public: is_group_by_all = is_group_by_all_value; } + /// Returns true, if query node has ORDER BY ALL modifier, false otherwise + bool isOrderByAll() const + { + return is_order_by_all; + } + + /// Set query node ORDER BY ALL modifier value + void setIsOrderByAll(bool is_order_by_all_value) + { + is_order_by_all = is_order_by_all_value; + } + /// Returns true if query node WITH section is not empty, false otherwise bool hasWith() const { @@ -590,6 +602,7 @@ private: bool is_group_by_with_cube = false; bool is_group_by_with_grouping_sets = false; bool is_group_by_all = false; + bool is_order_by_all = false; std::string cte_name; NamesAndTypes projection_columns; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index c541888e5b9..4e2d0ad10a8 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -284,6 +284,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup); current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets); current_query_tree->setIsGroupByAll(select_query_typed.group_by_all); + current_query_tree->setIsOrderByAll(select_query_typed.order_by_all); current_query_tree->setOriginalAST(select_query); auto current_context = current_query_tree->getContext(); diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 918126e0ccc..f75022220e7 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -667,4 +667,20 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node) return out; } +QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context) +{ + auto enum_literal = std::make_shared(result_type->getName(), std::make_shared()); + auto enum_literal_node = std::make_shared(std::move(enum_literal)); + + auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context)); + QueryTreeNodes arguments{ std::move(node), std::move(enum_literal_node) }; + + auto function_node = std::make_shared("_CAST"); + function_node->getArguments().getNodes() = std::move(arguments); + + function_node->resolveAsFunction(cast_function->build(function_node->getArgumentColumns())); + + return function_node; +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 060dc7d8bc0..e3316f5ad6b 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -99,4 +99,7 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context); /// Just collect all identifiers from query tree NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); +/// Wrap node into `_CAST` function +QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); + } diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 99e49026f2a..564a518689a 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -88,18 +88,19 @@ BackupEntriesCollector::BackupEntriesCollector( , read_settings(read_settings_) , context(context_) , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) - , collect_metadata_timeout(context->getConfigRef().getUInt64("backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))) + , collect_metadata_timeout(context->getConfigRef().getUInt64( + "backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))) , attempts_to_collect_metadata_before_sleep(context->getConfigRef().getUInt("backups.attempts_to_collect_metadata_before_sleep", 2)) - , min_sleep_before_next_attempt_to_collect_metadata(context->getConfigRef().getUInt64("backups.min_sleep_before_next_attempt_to_collect_metadata", 100)) - , max_sleep_before_next_attempt_to_collect_metadata(context->getConfigRef().getUInt64("backups.max_sleep_before_next_attempt_to_collect_metadata", 5000)) + , min_sleep_before_next_attempt_to_collect_metadata( + context->getConfigRef().getUInt64("backups.min_sleep_before_next_attempt_to_collect_metadata", 100)) + , max_sleep_before_next_attempt_to_collect_metadata( + context->getConfigRef().getUInt64("backups.max_sleep_before_next_attempt_to_collect_metadata", 5000)) , compare_collected_metadata(context->getConfigRef().getBool("backups.compare_collected_metadata", true)) , log(&Poco::Logger::get("BackupEntriesCollector")) , global_zookeeper_retries_info( - "BackupEntriesCollector", - log, - context->getSettingsRef().backup_restore_keeper_max_retries, - context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms, - context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms) + context->getSettingsRef().backup_restore_keeper_max_retries, + context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms, + context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms) , threadpool(threadpool_) { } @@ -572,7 +573,7 @@ std::vector> BackupEntriesCollector::findTablesInD { /// Database or table could be replicated - so may use ZooKeeper. We need to retry. auto zookeeper_retries_info = global_zookeeper_retries_info; - ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getTablesForBackup", log, zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); }); } catch (Exception & e) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 74195a93072..967abe09b3f 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -146,7 +146,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name) { auto objects = listObjects(*client, s3_uri, file_name); if (objects.empty()) - throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist"); + throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist", file_name); return objects[0].GetSize(); } @@ -299,7 +299,7 @@ UInt64 BackupWriterS3::getFileSize(const String & file_name) { auto objects = listObjects(*client, s3_uri, file_name); if (objects.empty()) - throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist"); + throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist", file_name); return objects[0].GetSize(); } diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 61984d58889..9ac68bc2437 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -157,11 +157,16 @@ BackupImpl::~BackupImpl() void BackupImpl::open() { std::lock_guard lock{mutex}; - LOG_INFO(log, "{} backup: {}", ((open_mode == OpenMode::WRITE) ? "Writing" : "Reading"), backup_name_for_logging); - ProfileEvents::increment((open_mode == OpenMode::WRITE) ? ProfileEvents::BackupsOpenedForWrite : ProfileEvents::BackupsOpenedForRead); - if (open_mode == OpenMode::WRITE) + if (open_mode == OpenMode::READ) { + ProfileEvents::increment(ProfileEvents::BackupsOpenedForRead); + LOG_INFO(log, "Reading backup: {}", backup_name_for_logging); + } + else + { + ProfileEvents::increment(ProfileEvents::BackupsOpenedForWrite); + LOG_INFO(log, "Writing backup: {}", backup_name_for_logging); timestamp = std::time(nullptr); if (!uuid) uuid = UUIDHelpers::generateV4(); @@ -189,7 +194,7 @@ void BackupImpl::open() void BackupImpl::close() { std::lock_guard lock{mutex}; - closeArchive(); + closeArchive(/* finalize= */ false); if (!is_internal_backup && writer && !writing_finalized) removeAllFilesAfterFailure(); @@ -222,8 +227,11 @@ void BackupImpl::openArchive() } } -void BackupImpl::closeArchive() +void BackupImpl::closeArchive(bool finalize) { + if (finalize && archive_writer) + archive_writer->finalize(); + archive_reader.reset(); archive_writer.reset(); } @@ -978,7 +986,7 @@ void BackupImpl::finalizeWriting() { LOG_TRACE(log, "Finalizing backup {}", backup_name_for_logging); writeBackupMetadata(); - closeArchive(); + closeArchive(/* finalize= */ true); setCompressedSize(); removeLockFile(); LOG_TRACE(log, "Finalized backup {}", backup_name_for_logging); diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 6070db79aa6..b369fe00171 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -89,7 +89,7 @@ private: void close(); void openArchive(); - void closeArchive(); + void closeArchive(bool finalize); /// Writes the file ".backup" containing backup's metadata. void writeBackupMetadata() TSA_REQUIRES(mutex); diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp index f993d7ed984..2bff400d4fe 100644 --- a/src/Backups/BackupInfo.cpp +++ b/src/Backups/BackupInfo.cpp @@ -78,13 +78,16 @@ BackupInfo BackupInfo::fromAST(const IAST & ast) } } - res.args.reserve(list->children.size() - index); - for (; index < list->children.size(); ++index) + size_t args_size = list->children.size(); + res.args.reserve(args_size - index); + for (; index < args_size; ++index) { const auto & elem = list->children[index]; const auto * lit = elem->as(); if (!lit) + { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem)); + } res.args.push_back(lit->value); } } diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h index 54f5e5e9965..e57b57d75f1 100644 --- a/src/Backups/BackupOperationInfo.h +++ b/src/Backups/BackupOperationInfo.h @@ -17,6 +17,9 @@ struct BackupOperationInfo /// Operation name, a string like "Disk('backups', 'my_backup')" String name; + /// Base Backup Operation name, a string like "Disk('backups', 'my_base_backup')" + String base_backup_name; + /// This operation is internal and should not be shown in system.backups bool internal = false; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index a1f619af0a4..8c4bb7e414c 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -394,9 +394,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context auto backup_info = BackupInfo::fromAST(*backup_query->backup_name); String backup_name_for_logging = backup_info.toStringForLogging(); + String base_backup_name; + if (backup_settings.base_backup_info) + base_backup_name = backup_settings.base_backup_info->toString(); + try { - addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP); + addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, BackupStatus::CREATING_BACKUP); /// Prepare context to use. ContextPtr context_in_use = context; @@ -606,7 +610,6 @@ void BackupsWorker::doBackup( void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination) { - LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS); backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, ""); backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS); backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST))); @@ -745,8 +748,11 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt { auto backup_info = BackupInfo::fromAST(*restore_query->backup_name); String backup_name_for_logging = backup_info.toStringForLogging(); + String base_backup_name; + if (restore_settings.base_backup_info) + base_backup_name = restore_settings.base_backup_info->toString(); - addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING); + addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, BackupStatus::RESTORING); /// Prepare context to use. ContextMutablePtr context_in_use = context; @@ -1005,11 +1011,12 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr } -void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status) +void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status) { BackupOperationInfo info; info.id = id; info.name = name; + info.base_backup_name = base_backup_name; info.internal = internal; info.status = status; info.start_time = std::chrono::system_clock::now(); diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index b0a76eb0fa8..e2bd076314f 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -83,7 +83,7 @@ private: /// Run data restoring tasks which insert data to tables. void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool); - void addInfo(const BackupOperationID & id, const String & name, bool internal, BackupStatus status); + void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status); void setStatus(const BackupOperationID & id, BackupStatus status, bool throw_if_error = true); void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); } void setNumFilesAndSize(const BackupOperationID & id, size_t num_files, UInt64 total_size, size_t num_entries, diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 026671edd6a..4e580e493a7 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -43,14 +43,6 @@ namespace Stage = BackupCoordinationStage; namespace { - /// Uppercases the first character of a passed string. - String toUpperFirst(const String & str) - { - String res = str; - res[0] = std::toupper(res[0]); - return res; - } - /// Outputs "table " or "temporary table " String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) { @@ -145,7 +137,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) void RestorerFromBackup::setStage(const String & new_stage, const String & message) { - LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage))); + LOG_TRACE(log, "Setting stage: {}", new_stage); current_stage = new_stage; if (restore_coordination) diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 40ae8d06462..55809dc6958 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -20,22 +20,19 @@ WithRetries::KeeperSettings WithRetries::KeeperSettings::fromContext(ContextPtr }; } -WithRetries::WithRetries(Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_) +WithRetries::WithRetries( + Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_) : log(log_) , get_zookeeper(get_zookeeper_) , settings(settings_) , callback(callback_) , global_zookeeper_retries_info( - log->name(), - log, - settings.keeper_max_retries, - settings.keeper_retry_initial_backoff_ms, - settings.keeper_retry_max_backoff_ms) + settings.keeper_max_retries, settings.keeper_retry_initial_backoff_ms, settings.keeper_retry_max_backoff_ms) {} WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name) : info(parent->global_zookeeper_retries_info) - , retries_ctl(name, info, nullptr) + , retries_ctl(name, parent->log, info, nullptr) , faulty_zookeeper(parent->getFaultyZooKeeper()) {} diff --git a/src/BridgeHelper/LibraryBridgeHelper.cpp b/src/BridgeHelper/LibraryBridgeHelper.cpp index 60588951c32..e83707595b9 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.cpp +++ b/src/BridgeHelper/LibraryBridgeHelper.cpp @@ -12,7 +12,7 @@ LibraryBridgeHelper::LibraryBridgeHelper(ContextPtr context_) , http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value) , bridge_host(config.getString("library_bridge.host", DEFAULT_HOST)) , bridge_port(config.getUInt("library_bridge.port", DEFAULT_PORT)) - , http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0})) + , http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout)) { } diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index 44104f26f63..060de74b5b1 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -162,7 +162,7 @@ private: ConnectionTimeouts getHTTPTimeouts() { - return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), {getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}); + return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings().keep_alive_timeout); } protected: diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 849308155b0..dbb115f44ef 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -201,7 +201,7 @@ void LocalConnection::sendQuery( catch (...) { state->io.onException(); - state->exception = std::make_unique(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"); + state->exception = std::make_unique(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception")); } } @@ -311,7 +311,7 @@ bool LocalConnection::poll(size_t) catch (...) { state->io.onException(); - state->exception = std::make_unique(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"); + state->exception = std::make_unique(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception")); } } diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index c7ebcac1264..eb98c3a5740 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -48,7 +48,7 @@ Suggest::Suggest() "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND", - "IGNORE NULLS", "RESPECT NULLS", "OVER"}); + "IGNORE NULLS", "RESPECT NULLS", "OVER", "PASTE"}); } static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion) diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h index 47b91ff4eef..b6dde039227 100644 --- a/src/Common/ArrayCache.h +++ b/src/Common/ArrayCache.h @@ -179,13 +179,22 @@ private: { ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == ptr) - throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size)); + throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size)); } ~Chunk() { if (ptr && 0 != munmap(ptr, size)) - throw ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size)); + { + try + { + throw DB::ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size)); + } + catch (DB::ErrnoException &) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + } + } } Chunk(Chunk && other) noexcept : ptr(other.ptr), size(other.size) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 9df6d22df04..e8deb459b24 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "config.h" @@ -655,6 +656,19 @@ void AsynchronousMetrics::update(TimePoint update_time) total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas); } } + + { + struct rusage rusage{}; + if (!getrusage(RUSAGE_SELF, &rusage)) + { + new_values["MemoryResidentMax"] = { rusage.ru_maxrss * 1024 /* KiB -> bytes */, + "Maximum amount of physical memory used by the server process, in bytes." }; + } + else + { + LOG_ERROR(log, "Cannot obtain resource usage: {}", errnoToString(errno)); + } + } #endif #if defined(OS_LINUX) diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 9b2507794bb..aabc848b230 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -93,15 +93,6 @@ public: return Exception(msg, code, remote_); } - /// Message must be a compile-time constant - template - requires std::is_convertible_v - Exception(int code, T && message) : Exception(message, code) - { - capture_thread_frame_pointers = thread_frame_pointers; - message_format_string = tryGetStaticFormatString(message); - } - /// These creators are for messages that were received by network or generated by a third-party library in runtime. /// Please use a constructor for all other cases. static Exception createRuntime(int code, const String & message) { return Exception(message, code); } diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index 60834afab35..c4cb4266418 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -18,16 +18,37 @@ template static inline String formatQuoted(T x) { WriteBufferFromOwnString wb; - writeQuoted(x, wb); - return wb.str(); -} -template -static inline void writeQuoted(const DecimalField & x, WriteBuffer & buf) -{ - writeChar('\'', buf); - writeText(x.getValue(), x.getScale(), buf, {}); - writeChar('\'', buf); + if constexpr (is_decimal_field) + { + writeChar('\'', wb); + writeText(x.getValue(), x.getScale(), wb, {}); + writeChar('\'', wb); + } + else if constexpr (is_big_int_v) + { + writeChar('\'', wb); + writeText(x, wb); + writeChar('\'', wb); + } + else + { + /// While `writeQuoted` sounds like it will always write the value in quotes, + /// in fact it means: write according to the rules of the quoted format, like VALUES, + /// where strings, dates, date-times, UUID are in quotes, and numbers are not. + + /// That's why we take extra care to put Decimal and big integers inside quotes + /// when formatting literals in SQL language, + /// because it is different from the quoted formats like VALUES. + + /// In fact, there are no Decimal and big integer literals in SQL, + /// but they can appear if we format the query from a modified AST. + + /// We can fix this idiosyncrasy later. + + writeQuoted(x, wb); + } + return wb.str(); } /** In contrast to writeFloatText (and writeQuoted), diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp index 891aa53c061..0035e7abfe8 100644 --- a/src/Common/Macros.cpp +++ b/src/Common/Macros.cpp @@ -120,7 +120,7 @@ String Macros::expand(const String & s, auto uuid = ServerUUID::get(); if (UUIDHelpers::Nil == uuid) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Macro {server_uuid} expanded to zero, which means the UUID is not initialized (most likely it's not a server application)"); + "Macro {{server_uuid}} expanded to zero, which means the UUID is not initialized (most likely it's not a server application)"); res += toString(uuid); info.expanded_other = true; } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index f342a19b2aa..4bdf6288a1c 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -250,9 +250,9 @@ Number of times data after merge is not byte-identical to the data on another re 7. Manual modification of source data after server startup. 8. Manual modification of checksums stored in ZooKeeper. 9. Part format related settings like 'enable_mixed_granularity_parts' are different on different replicas. -The server successfully detected this situation and will download merged part from replica to force byte-identical result. +The server successfully detected this situation and will download merged part from the replica to force the byte-identical result. )") \ - M(DataAfterMutationDiffersFromReplica, "Number of times data after mutation is not byte-identical to the data on another replicas. In addition to the reasons described in 'DataAfterMergeDiffersFromReplica', it is also possible due to non-deterministic mutation.") \ + M(DataAfterMutationDiffersFromReplica, "Number of times data after mutation is not byte-identical to the data on other replicas. In addition to the reasons described in 'DataAfterMergeDiffersFromReplica', it is also possible due to non-deterministic mutation.") \ M(PolygonsAddedToPool, "A polygon has been added to the cache (pool) for the 'pointInPolygon' function.") \ M(PolygonsInPoolAllocatedBytes, "The number of bytes for polygons added to the cache (pool) for the 'pointInPolygon' function.") \ \ @@ -272,12 +272,12 @@ The server successfully detected this situation and will download merged part fr M(PartsLockWaitMicroseconds, "Total time spent waiting for data parts lock in MergeTree tables") \ \ M(RealTimeMicroseconds, "Total (wall clock) time spent in processing (queries and other tasks) threads (note that this is a sum).") \ - M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This include time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \ + M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This includes time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \ M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel mode. This is time spent in syscalls, excluding waiting time during blocking syscalls.") \ M(MemoryOvercommitWaitTimeMicroseconds, "Total time spent in waiting for memory to be freed in OvercommitTracker.") \ M(MemoryAllocatorPurge, "Total number of times memory allocator purge was requested") \ M(MemoryAllocatorPurgeTimeMicroseconds, "Total number of times memory allocator purge was requested") \ - M(SoftPageFaults, "The number of soft page faults in query execution threads. Soft page fault usually means a miss in the memory allocator cache which required a new memory mapping from the OS and subsequent allocation of a page of physical memory.") \ + M(SoftPageFaults, "The number of soft page faults in query execution threads. Soft page fault usually means a miss in the memory allocator cache, which requires a new memory mapping from the OS and subsequent allocation of a page of physical memory.") \ M(HardPageFaults, "The number of hard page faults in query execution threads. High values indicate either that you forgot to turn off swap on your server, or eviction of memory pages of the ClickHouse binary during very high memory pressure, or successful usage of the 'mmap' read method for the tables data.") \ \ M(OSIOWaitMicroseconds, "Total time a thread spent waiting for a result of IO operation, from the OS point of view. This is real IO that doesn't include page cache.") \ @@ -290,8 +290,8 @@ The server successfully detected this situation and will download merged part fr \ M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.") \ M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \ - M(PerfCacheReferences, "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \ - M(PerfCacheMisses, "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in con‐junction with the PERFCOUNTHWCACHEREFERENCES event to calculate cache miss rates.") \ + M(PerfCacheReferences, "Cache accesses. Usually, this indicates Last Level Cache accesses, but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \ + M(PerfCacheMisses, "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERFCOUNTHWCACHEREFERENCES event to calculate cache miss rates.") \ M(PerfBranchInstructions, "Retired branch instructions. Prior to Linux 2.6.35, this used the wrong event on AMD processors.") \ M(PerfBranchMisses, "Mispredicted branch instructions.") \ M(PerfBusCycles, "Bus cycles, which can be different from total cycles.") \ @@ -457,7 +457,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \ M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \ M(FileSegmentCacheWriteMicroseconds, "Metric per file segment. Time spend writing data to cache") \ - M(FileSegmentPredownloadMicroseconds, "Metric per file segment. Time spent predownloading data to cache (predownloading - finishing file segment download (after someone who failed to do that) up to the point current thread was requested to do)") \ + M(FileSegmentPredownloadMicroseconds, "Metric per file segment. Time spent pre-downloading data to cache (pre-downloading - finishing file segment download (after someone who failed to do that) up to the point current thread was requested to do)") \ M(FileSegmentUsedBytes, "Metric per file segment. How many bytes were actually used from current file segment") \ \ M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \ @@ -466,12 +466,12 @@ The server successfully detected this situation and will download merged part fr M(SleepFunctionMicroseconds, "Time set to sleep in a sleep function (sleep, sleepEachRow).") \ M(SleepFunctionElapsedMicroseconds, "Time spent sleeping in a sleep function (sleep, sleepEachRow).") \ \ - M(ThreadPoolReaderPageCacheHit, "Number of times the read inside ThreadPoolReader was done from page cache.") \ - M(ThreadPoolReaderPageCacheHitBytes, "Number of bytes read inside ThreadPoolReader when it was done from page cache.") \ + M(ThreadPoolReaderPageCacheHit, "Number of times the read inside ThreadPoolReader was done from the page cache.") \ + M(ThreadPoolReaderPageCacheHitBytes, "Number of bytes read inside ThreadPoolReader when it was done from the page cache.") \ M(ThreadPoolReaderPageCacheHitElapsedMicroseconds, "Time spent reading data from page cache in ThreadPoolReader.") \ M(ThreadPoolReaderPageCacheMiss, "Number of times the read inside ThreadPoolReader was not done from page cache and was hand off to thread pool.") \ M(ThreadPoolReaderPageCacheMissBytes, "Number of bytes read inside ThreadPoolReader when read was not done from page cache and was hand off to thread pool.") \ - M(ThreadPoolReaderPageCacheMissElapsedMicroseconds, "Time spent reading data inside the asynchronous job in ThreadPoolReader - when read was not done from page cache.") \ + M(ThreadPoolReaderPageCacheMissElapsedMicroseconds, "Time spent reading data inside the asynchronous job in ThreadPoolReader - when read was not done from the page cache.") \ \ M(AsynchronousReadWaitMicroseconds, "Time spent in waiting for asynchronous reads in asynchronous local read.") \ M(SynchronousReadWaitMicroseconds, "Time spent in waiting for synchronous reads in asynchronous local read.") \ @@ -512,7 +512,7 @@ The server successfully detected this situation and will download merged part fr M(SchemaInferenceCacheSchemaHits, "Number of times the schema is found in schema cache during schema inference") \ M(SchemaInferenceCacheNumRowsHits, "Number of times the number of rows is found in schema cache during count from files") \ M(SchemaInferenceCacheMisses, "Number of times the requested source is not in schema cache") \ - M(SchemaInferenceCacheSchemaMisses, "Number of times the requested source is in cache but the schema is not in cache while schema inference") \ + M(SchemaInferenceCacheSchemaMisses, "Number of times the requested source is in cache but the schema is not in cache during schema inference") \ M(SchemaInferenceCacheNumRowsMisses, "Number of times the requested source is in cache but the number of rows is not in cache while count from files") \ M(SchemaInferenceCacheEvictions, "Number of times a schema from cache was evicted due to overflow") \ M(SchemaInferenceCacheInvalidations, "Number of times a schema in cache became invalid due to changes in data") \ @@ -570,7 +570,7 @@ The server successfully detected this situation and will download merged part fr \ M(ReadTaskRequestsSent, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side.") \ M(MergeTreeReadTaskRequestsSent, "The number of callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \ - M(MergeTreeAllRangesAnnouncementsSent, "The number of announcement sent from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ + M(MergeTreeAllRangesAnnouncementsSent, "The number of announcements sent from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(ReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side.") \ M(MergeTreeReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \ M(MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, "Time spent in sending the announcement from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ @@ -586,6 +586,8 @@ The server successfully detected this situation and will download merged part fr M(LogWarning, "Number of log messages with level Warning") \ M(LogError, "Number of log messages with level Error") \ M(LogFatal, "Number of log messages with level Fatal") \ + \ + M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index a0b3d411e38..d82b582fee6 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -188,6 +188,9 @@ typename SystemLogQueue::Index SystemLogQueue::pop(std:: bool & should_prepare_tables_anyway, bool & exit_this_thread) { + /// Call dtors and deallocate strings without holding the global lock + output.resize(0); + std::unique_lock lock(mutex); flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), @@ -200,7 +203,6 @@ typename SystemLogQueue::Index SystemLogQueue::pop(std:: queue_front_index += queue.size(); // Swap with existing array from previous flush, to save memory // allocations. - output.resize(0); queue.swap(output); should_prepare_tables_anyway = is_force_prepare_tables; diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 80dee2b5c81..76cdfe9f230 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -471,7 +471,7 @@ private: /// Message must be a compile-time constant template requires std::is_convertible_v - Exception(T && message, const Error code_) : DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::forward(message)), code(code_) + Exception(T && message, const Error code_) : DB::Exception(std::forward(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_) { incrementErrorMetrics(code); } diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp index 4de11cdbc7e..72923ca0487 100644 --- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp +++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp @@ -290,6 +290,11 @@ bool ZooKeeperWithFaultInjection::exists(const std::string & path, Coordination: return executeWithFaultSync(__func__, path, [&]() { return keeper->exists(path, stat, watch); }); } +bool ZooKeeperWithFaultInjection::anyExists(const std::vector & paths) +{ + return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->anyExists(paths); }); +} + zkutil::ZooKeeper::MultiExistsResponse ZooKeeperWithFaultInjection::exists(const std::vector & paths) { return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); }); diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h index 9354c53df26..57e1f0f3b87 100644 --- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h +++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h @@ -59,6 +59,7 @@ private: class ZooKeeperWithFaultInjection { zkutil::ZooKeeper::Ptr keeper; + std::unique_ptr fault_policy; std::string name; Poco::Logger * logger = nullptr; @@ -203,6 +204,8 @@ public: zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector & paths); + bool anyExists(const std::vector & paths); + std::string create(const std::string & path, const std::string & data, int32_t mode); Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f84e28942c5..5b3388a3b7d 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -28,6 +28,7 @@ #cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_FASTOPS #cmakedefine01 USE_SQIDS +#cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_LIBURING diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index bc5e3a723f2..638ee11858a 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -859,6 +859,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ initial_batch_committed = true; return nuraft::cb_func::ReturnCode::Ok; } + case nuraft::cb_func::PreAppendLogLeader: + { + return nuraft::cb_func::ReturnCode::ReturnNull; + } case nuraft::cb_func::PreAppendLogFollower: { const auto & entry = *static_cast(param->ctx); diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index d76e310f2a3..a245ccc16df 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -76,7 +76,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo LOG_INFO(log, "S3 configuration was updated"); - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); + auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); auto headers = auth_settings.headers; static constexpr size_t s3_max_redirects = 10; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 0d1d07ec7c5..41e6f5b5e2b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -914,7 +914,7 @@ void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::strin { auto ephemerals_it = ephemerals.find(session_id); if (ephemerals_it == ephemerals.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Session {} is missing ephemeral path"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session {} is missing ephemeral path", session_id); ephemerals_it->second.erase(path); if (ephemerals_it->second.empty()) diff --git a/src/Core/Joins.cpp b/src/Core/Joins.cpp index 9c8ece82224..77568223d71 100644 --- a/src/Core/Joins.cpp +++ b/src/Core/Joins.cpp @@ -13,6 +13,7 @@ const char * toString(JoinKind kind) case JoinKind::Full: return "FULL"; case JoinKind::Cross: return "CROSS"; case JoinKind::Comma: return "COMMA"; + case JoinKind::Paste: return "PASTE"; } }; diff --git a/src/Core/Joins.h b/src/Core/Joins.h index 6884e8dfd9a..cc69f07263d 100644 --- a/src/Core/Joins.h +++ b/src/Core/Joins.h @@ -13,7 +13,8 @@ enum class JoinKind Right, Full, Cross, /// Direct product. Strictness and condition doesn't matter. - Comma /// Same as direct product. Intended to be converted to INNER JOIN with conditions from WHERE. + Comma, /// Same as direct product. Intended to be converted to INNER JOIN with conditions from WHERE. + Paste, /// Used to join parts without `ON` clause. }; const char * toString(JoinKind kind); @@ -27,6 +28,7 @@ inline constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::R inline constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; } inline constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; } inline constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; } +inline constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; } /// Allows more optimal JOIN for typical cases. enum class JoinStrictness diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6edd5297d35..85e3d33f80b 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -100,6 +100,10 @@ namespace DB M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ \ + M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ + M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ + M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ + M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \ M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 5e5194eeb68..a38197b9eeb 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -107,9 +107,7 @@ std::vector Settings::getAllRegisteredNames() const { std::vector all_settings; for (const auto & setting_field : all()) - { all_settings.push_back(setting_field.getName()); - } return all_settings; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fe9d703706e..b75004a3396 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -338,6 +338,7 @@ class IColumn; M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \ M(UInt64, http_max_chunk_size, 100_GiB, "Maximum value of a chunk size in HTTP chunked transfer encoding", 0) \ M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \ + M(Bool, http_make_head_request, true, "Allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \ M(UInt64, use_index_for_in_with_subqueries_max_values, 0, "The maximum size of set in the right hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to preparation of additional data structures for large queries. Zero means no limit.", 0) \ @@ -527,8 +528,8 @@ class IColumn; M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \ M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \ M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \ - M(UInt64, max_table_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, max_partition_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \ + M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ + M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ \ M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ @@ -845,6 +846,7 @@ class IColumn; M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \ M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ + M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 53d5245770e..a967ecf67c6 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -323,7 +323,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context)) { - configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false); + configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false); use_table_cache = named_collection->getOrDefault("use_table_cache", 0); } else @@ -386,7 +386,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context)) { - configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false); + configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false); } else { diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 36dd858dcf7..952c0689a0d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1204,7 +1204,7 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node } void DatabaseReplicated::dropReplica( - DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica) + DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop) { assert(!database || database_zookeeper_path == database->zookeeper_path); @@ -1215,14 +1215,21 @@ void DatabaseReplicated::dropReplica( auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper(); - String database_mark = zookeeper->get(database_zookeeper_path); + String database_mark; + bool db_path_exists = zookeeper->tryGet(database_zookeeper_path, database_mark); + if (!db_path_exists && !throw_if_noop) + return; if (database_mark != REPLICATED_DATABASE_MARK) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} does not look like a path of Replicated database", database_zookeeper_path); String database_replica_path = fs::path(database_zookeeper_path) / "replicas" / full_replica_name; if (!zookeeper->exists(database_replica_path)) + { + if (!throw_if_noop) + return; throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} does not exist (database path: {})", full_replica_name, database_zookeeper_path); + } if (zookeeper->exists(database_replica_path + "/active")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} is active, cannot drop it (database path: {})", diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 202f5cc5c14..8a3999e70e9 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -79,7 +79,7 @@ public: bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override; - static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica); + static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop); std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 11655f5f100..b92b4a971c1 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -255,7 +255,7 @@ DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPt arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_); if (engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message.c_str()); + throw Exception::createRuntime(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message.c_str()); if (engine_args.empty()) return result; @@ -269,7 +269,7 @@ DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPt if (boost::iequals(second_arg, "NOSIGN")) result.no_sign_request = true; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str()); + throw Exception::createRuntime(ErrorCodes::BAD_ARGUMENTS, error_message.c_str()); } // url, access_key_id, secret_access_key @@ -279,7 +279,7 @@ DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPt auto secret_key = checkAndGetLiteralArgument(engine_args[2], "secret_access_key"); if (key_id.empty() || secret_key.empty() || boost::iequals(key_id, "NOSIGN")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str()); + throw Exception::createRuntime(ErrorCodes::BAD_ARGUMENTS, error_message.c_str()); result.access_key_id = key_id; result.secret_access_key = secret_key; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 76cd36bf76a..0b6bdea60a3 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -37,7 +37,7 @@ DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( auto expression = config.getString(config_prefix + ".expression", ""); if (name.empty() && !expression.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Element {}.name is empty"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Element {}.name is empty", config_prefix); const auto type_name = config.getString(config_prefix + ".type", default_type); return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)}; diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index c12f4fedf3f..689593a969e 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -38,7 +38,7 @@ HTTPDictionarySource::HTTPDictionarySource( , configuration(configuration_) , sample_block(sample_block_) , context(context_) - , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0})) + , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout)) { credentials.setUsername(credentials_.getUsername()); credentials.setPassword(credentials_.getPassword()); @@ -51,7 +51,7 @@ HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other) , configuration(other.configuration) , sample_block(other.sample_block) , context(Context::createCopy(other.context)) - , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0})) + , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout)) { credentials.setUsername(other.credentials.getUsername()); credentials.setPassword(other.credentials.getPassword()); diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 21016025d96..4c9ff8abe80 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -20,17 +20,19 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int DICTIONARY_IS_EMPTY; + extern const int LOGICAL_ERROR; extern const int UNSUPPORTED_METHOD; } -template -HashedArrayDictionary::HashedArrayDictionary( +template +HashedArrayDictionary::HashedArrayDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const HashedArrayDictionaryStorageConfiguration & configuration_, BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) + , log(&Poco::Logger::get("HashedArrayDictionary")) , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , configuration(configuration_) @@ -42,8 +44,8 @@ HashedArrayDictionary::HashedArrayDictionary( calculateBytesAllocated(); } -template -ColumnPtr HashedArrayDictionary::getColumn( +template +ColumnPtr HashedArrayDictionary::getColumn( const std::string & attribute_name, const DataTypePtr & result_type, const Columns & key_columns, @@ -67,8 +69,8 @@ ColumnPtr HashedArrayDictionary::getColumn( return getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor); } -template -Columns HashedArrayDictionary::getColumns( +template +Columns HashedArrayDictionary::getColumns( const Strings & attribute_names, const DataTypes & result_types, const Columns & key_columns, @@ -83,7 +85,7 @@ Columns HashedArrayDictionary::getColumns( const size_t keys_size = extractor.getKeysSize(); - PaddedPODArray key_index_to_element_index; + KeyIndexToElementIndex key_index_to_element_index; /** Optimization for multiple attributes. * For each key save element index in key_index_to_element_index array. @@ -92,7 +94,6 @@ Columns HashedArrayDictionary::getColumns( */ if (attribute_names.size() > 1) { - const auto & key_attribute_container = key_attribute.container; size_t keys_found = 0; key_index_to_element_index.resize(keys_size); @@ -100,15 +101,23 @@ Columns HashedArrayDictionary::getColumns( for (size_t key_index = 0; key_index < keys_size; ++key_index) { auto key = extractor.extractCurrentKey(); + auto shard = getShard(key); + const auto & key_attribute_container = key_attribute.containers[shard]; auto it = key_attribute_container.find(key); if (it == key_attribute_container.end()) { - key_index_to_element_index[key_index] = -1; + if constexpr (sharded) + key_index_to_element_index[key_index] = std::make_pair(-1, shard); + else + key_index_to_element_index[key_index] = -1; } else { - key_index_to_element_index[key_index] = it->getMapped(); + if constexpr (sharded) + key_index_to_element_index[key_index] = std::make_pair(it->getMapped(), shard); + else + key_index_to_element_index[key_index] = it->getMapped(); ++keys_found; } @@ -147,8 +156,8 @@ Columns HashedArrayDictionary::getColumns( return result_columns; } -template -ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const +template +ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const { if (dictionary_key_type == DictionaryKeyType::Complex) dict_struct.validateKeyTypes(key_types); @@ -166,8 +175,10 @@ ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Colum for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) { auto requested_key = extractor.extractCurrentKey(); + auto shard = getShard(requested_key); + const auto & key_attribute_container = key_attribute.containers[shard]; - out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end(); + out[requested_key_index] = key_attribute_container.find(requested_key) != key_attribute_container.end(); keys_found += out[requested_key_index]; extractor.rollbackCurrentKey(); @@ -179,8 +190,8 @@ ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Colum return result; } -template -ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const +template +ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { @@ -197,16 +208,20 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key if (!dictionary_attribute.null_value.isNull()) null_value = dictionary_attribute.null_value.get(); - const auto & key_attribute_container = key_attribute.container; - const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); - auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + auto is_key_valid_func = [&, this](auto & key) + { + const auto & key_attribute_container = key_attribute.containers[getShard(key)]; + return key_attribute_container.find(key) != key_attribute_container.end(); + }; size_t keys_found = 0; - auto get_parent_func = [&](auto & hierarchy_key) + auto get_parent_func = [&, this](auto & hierarchy_key) { std::optional result; + auto shard = getShard(hierarchy_key); + const auto & key_attribute_container = key_attribute.containers[shard]; auto it = key_attribute_container.find(hierarchy_key); @@ -215,8 +230,9 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key size_t key_index = it->getMapped(); - if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index]) + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index]) return result; + const auto & parent_keys_container = std::get>(hierarchical_attribute.containers)[shard]; UInt64 parent_key = parent_keys_container[key_index]; if (null_value && *null_value == parent_key) @@ -241,8 +257,8 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key } } -template -ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( +template +ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( ColumnPtr key_column [[maybe_unused]], ColumnPtr in_key_column [[maybe_unused]], const DataTypePtr &) const @@ -265,16 +281,20 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( if (!dictionary_attribute.null_value.isNull()) null_value = dictionary_attribute.null_value.get(); - const auto & key_attribute_container = key_attribute.container; - const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); - auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + auto is_key_valid_func = [&](auto & key) + { + const auto & key_attribute_container = key_attribute.containers[getShard(key)]; + return key_attribute_container.find(key) != key_attribute_container.end(); + }; size_t keys_found = 0; auto get_parent_func = [&](auto & hierarchy_key) { std::optional result; + auto shard = getShard(hierarchy_key); + const auto & key_attribute_container = key_attribute.containers[shard]; auto it = key_attribute_container.find(hierarchy_key); @@ -283,9 +303,10 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( size_t key_index = it->getMapped(); - if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index]) + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index]) return result; + const auto & parent_keys_container = std::get>(hierarchical_attribute.containers)[shard]; UInt64 parent_key = parent_keys_container[key_index]; if (null_value && *null_value == parent_key) return result; @@ -309,8 +330,8 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( } } -template -DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary::getHierarchicalIndex() const +template +DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary::getHierarchicalIndex() const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { @@ -318,33 +339,35 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary & parent_keys_container = std::get>(hierarchical_attribute.container); - - const auto & key_attribute_container = key_attribute.container; - - HashMap index_to_key; - index_to_key.reserve(key_attribute.container.size()); - - for (auto & [key, value] : key_attribute_container) - index_to_key[value] = key; DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; - parent_to_child.reserve(index_to_key.size()); - - size_t parent_keys_container_size = parent_keys_container.size(); - for (size_t i = 0; i < parent_keys_container_size; ++i) + for (size_t shard = 0; shard < configuration.shards; ++shard) { - if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i]) - continue; + HashMap index_to_key; + index_to_key.reserve(element_counts[shard]); - const auto * it = index_to_key.find(i); - if (it == index_to_key.end()) - continue; + for (auto & [key, value] : key_attribute.containers[shard]) + index_to_key[value] = key; - auto child_key = it->getMapped(); - auto parent_key = parent_keys_container[i]; - parent_to_child[parent_key].emplace_back(child_key); + parent_to_child.reserve(parent_to_child.size() + index_to_key.size()); + + const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + const auto & parent_keys_container = std::get>(hierarchical_attribute.containers)[shard]; + + size_t parent_keys_container_size = parent_keys_container.size(); + for (size_t i = 0; i < parent_keys_container_size; ++i) + { + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][i]) + continue; + + const auto * it = index_to_key.find(i); + if (it == index_to_key.end()) + continue; + + auto child_key = it->getMapped(); + auto parent_key = parent_keys_container[i]; + parent_to_child[parent_key].emplace_back(child_key); + } } return std::make_shared(parent_to_child); @@ -355,8 +378,8 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary -ColumnPtr HashedArrayDictionary::getDescendants( +template +ColumnPtr HashedArrayDictionary::getDescendants( ColumnPtr key_column [[maybe_unused]], const DataTypePtr &, size_t level [[maybe_unused]], @@ -381,8 +404,8 @@ ColumnPtr HashedArrayDictionary::getDescendants( } } -template -void HashedArrayDictionary::createAttributes() +template +void HashedArrayDictionary::createAttributes() { const auto size = dict_struct.attributes.size(); attributes.reserve(size); @@ -395,17 +418,24 @@ void HashedArrayDictionary::createAttributes() using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional>() : std::optional>{}; - Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType(), std::move(is_index_null)}; + auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional>(configuration.shards) : std::nullopt; + Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerShardsType(configuration.shards), std::move(is_index_null)}; attributes.emplace_back(std::move(attribute)); }; callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); } + + key_attribute.containers.resize(configuration.shards); + element_counts.resize(configuration.shards); + + string_arenas.resize(configuration.shards); + for (auto & arena : string_arenas) + arena = std::make_unique(); } -template -void HashedArrayDictionary::updateData() +template +void HashedArrayDictionary::updateData() { if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { @@ -445,13 +475,17 @@ void HashedArrayDictionary::updateData() if (update_field_loaded_block) { resize(update_field_loaded_block->rows()); - blockToAttributes(*update_field_loaded_block.get()); + DictionaryKeysArenaHolder arena_holder; + blockToAttributes(*update_field_loaded_block.get(), arena_holder, /* shard = */ 0); } } -template -void HashedArrayDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +template +void HashedArrayDictionary::blockToAttributes(const Block & block, DictionaryKeysArenaHolder & arena_holder, size_t shard) { + if (unlikely(shard >= configuration.shards)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Shard number {} is out of range: 0..{}", shard, configuration.shards - 1); + size_t skip_keys_size_offset = dict_struct.getKeysSize(); Columns key_columns; @@ -461,7 +495,6 @@ void HashedArrayDictionary::blockToAttributes(const Block & for (size_t i = 0; i < skip_keys_size_offset; ++i) key_columns.emplace_back(block.safeGetByPosition(i).column); - DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); const size_t keys_size = keys_extractor.getKeysSize(); @@ -471,18 +504,18 @@ void HashedArrayDictionary::blockToAttributes(const Block & { auto key = keys_extractor.extractCurrentKey(); - auto it = key_attribute.container.find(key); + auto it = key_attribute.containers[shard].find(key); - if (it != key_attribute.container.end()) + if (it != key_attribute.containers[shard].end()) { keys_extractor.rollbackCurrentKey(); continue; } if constexpr (std::is_same_v) - key = copyStringInArena(string_arena, key); + key = copyStringInArena(*string_arenas[shard], key); - key_attribute.container.insert({key, element_count}); + key_attribute.containers[shard].insert({key, element_counts[shard]}); for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) { @@ -498,16 +531,16 @@ void HashedArrayDictionary::blockToAttributes(const Block & using AttributeType = typename Type::AttributeType; using AttributeValueType = DictionaryValueType; - auto & attribute_container = std::get>(attribute.container); + auto & attribute_container = std::get>(attribute.containers)[shard]; attribute_container.emplace_back(); if (attribute_is_nullable) { - attribute.is_index_null->emplace_back(); + (*attribute.is_index_null)[shard].emplace_back(); if (column_value_to_insert.isNull()) { - (*attribute.is_index_null).back() = true; + (*attribute.is_index_null)[shard].back() = true; return; } } @@ -515,7 +548,7 @@ void HashedArrayDictionary::blockToAttributes(const Block & if constexpr (std::is_same_v) { String & value_to_insert = column_value_to_insert.get(); - StringRef string_in_arena_reference = copyStringInArena(string_arena, value_to_insert); + StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert); attribute_container.back() = string_in_arena_reference; } else @@ -528,23 +561,29 @@ void HashedArrayDictionary::blockToAttributes(const Block & callOnDictionaryAttributeType(attribute.type, type_call); } - ++element_count; + ++element_counts[shard]; + ++total_element_count; keys_extractor.rollbackCurrentKey(); } } -template -void HashedArrayDictionary::resize(size_t total_rows) +template +void HashedArrayDictionary::resize(size_t total_rows) { if (unlikely(!total_rows)) return; - key_attribute.container.reserve(total_rows); + /// In multi shards configuration it is pointless. + if constexpr (sharded) + return; + + for (auto & container : key_attribute.containers) + container.reserve(total_rows); } -template +template template -ColumnPtr HashedArrayDictionary::getAttributeColumn( +ColumnPtr HashedArrayDictionary::getAttributeColumn( const Attribute & attribute, const DictionaryAttribute & dictionary_attribute, size_t keys_size, @@ -638,16 +677,14 @@ ColumnPtr HashedArrayDictionary::getAttributeColumn( return result; } -template +template template -void HashedArrayDictionary::getItemsImpl( +void HashedArrayDictionary::getItemsImpl( const Attribute & attribute, DictionaryKeysExtractor & keys_extractor, ValueSetter && set_value [[maybe_unused]], DefaultValueExtractor & default_value_extractor) const { - const auto & key_attribute_container = key_attribute.container; - const auto & attribute_container = std::get>(attribute.container); const size_t keys_size = keys_extractor.getKeysSize(); size_t keys_found = 0; @@ -655,6 +692,9 @@ void HashedArrayDictionary::getItemsImpl( for (size_t key_index = 0; key_index < keys_size; ++key_index) { auto key = keys_extractor.extractCurrentKey(); + auto shard = getShard(key); + const auto & key_attribute_container = key_attribute.containers[shard]; + const auto & attribute_container = std::get>(attribute.containers)[shard]; const auto it = key_attribute_container.find(key); @@ -665,7 +705,7 @@ void HashedArrayDictionary::getItemsImpl( const auto & element = attribute_container[element_index]; if constexpr (is_nullable) - set_value(key_index, element, (*attribute.is_index_null)[element_index]); + set_value(key_index, element, (*attribute.is_index_null)[shard][element_index]); else set_value(key_index, element, false); @@ -686,28 +726,39 @@ void HashedArrayDictionary::getItemsImpl( found_count.fetch_add(keys_found, std::memory_order_relaxed); } -template +template template -void HashedArrayDictionary::getItemsImpl( +void HashedArrayDictionary::getItemsImpl( const Attribute & attribute, - const PaddedPODArray & key_index_to_element_index, + const KeyIndexToElementIndex & key_index_to_element_index, ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const { - const auto & attribute_container = std::get>(attribute.container); const size_t keys_size = key_index_to_element_index.size(); + size_t shard = 0; for (size_t key_index = 0; key_index < keys_size; ++key_index) { - bool key_exists = key_index_to_element_index[key_index] != -1; - - if (key_exists) + ssize_t element_index; + if constexpr (sharded) { - size_t element_index = static_cast(key_index_to_element_index[key_index]); - const auto & element = attribute_container[element_index]; + element_index = key_index_to_element_index[key_index].first; + shard = key_index_to_element_index[key_index].second; + } + else + { + element_index = key_index_to_element_index[key_index]; + } + + if (element_index != -1) + { + const auto & attribute_container = std::get>(attribute.containers)[shard]; + + size_t found_element_index = static_cast(element_index); + const auto & element = attribute_container[found_element_index]; if constexpr (is_nullable) - set_value(key_index, element, (*attribute.is_index_null)[element_index]); + set_value(key_index, element, (*attribute.is_index_null)[shard][found_element_index]); else set_value(key_index, element, false); } @@ -721,13 +772,17 @@ void HashedArrayDictionary::getItemsImpl( } } -template -void HashedArrayDictionary::loadData() +template +void HashedArrayDictionary::loadData() { if (!source_ptr->hasUpdateField()) { - QueryPipeline pipeline; - pipeline = QueryPipeline(source_ptr->loadAll()); + + std::optional parallel_loader; + if constexpr (sharded) + parallel_loader.emplace(*this); + + QueryPipeline pipeline(source_ptr->loadAll()); DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); UInt64 pull_time_microseconds = 0; @@ -751,10 +806,22 @@ void HashedArrayDictionary::loadData() Stopwatch watch_process; resize(total_rows); - blockToAttributes(block); + + if (parallel_loader) + { + parallel_loader->addBlock(block); + } + else + { + DictionaryKeysArenaHolder arena_holder; + blockToAttributes(block, arena_holder, /* shard = */ 0); + } process_time_microseconds += watch_process.elapsedMicroseconds(); } + if (parallel_loader) + parallel_loader->finish(); + LOG_DEBUG(&Poco::Logger::get("HashedArrayDictionary"), "Finished {}reading {} blocks with {} rows from pipeline in {:.2f} sec and inserted into hashtable in {:.2f} sec", configuration.use_async_executor ? "asynchronous " : "", @@ -765,14 +832,14 @@ void HashedArrayDictionary::loadData() updateData(); } - if (configuration.require_nonempty && 0 == element_count) + if (configuration.require_nonempty && 0 == total_element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName()); } -template -void HashedArrayDictionary::buildHierarchyParentToChildIndexIfNeeded() +template +void HashedArrayDictionary::buildHierarchyParentToChildIndexIfNeeded() { if (!dict_struct.hierarchical_attribute_index) return; @@ -781,12 +848,13 @@ void HashedArrayDictionary::buildHierarchyParentToChildInde hierarchical_index = getHierarchicalIndex(); } -template -void HashedArrayDictionary::calculateBytesAllocated() +template +void HashedArrayDictionary::calculateBytesAllocated() { bytes_allocated += attributes.size() * sizeof(attributes.front()); - bytes_allocated += key_attribute.container.size(); + for (const auto & container : key_attribute.containers) + bytes_allocated += container.size(); for (auto & attribute : attributes) { @@ -796,26 +864,29 @@ void HashedArrayDictionary::calculateBytesAllocated() using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - const auto & container = std::get>(attribute.container); - bytes_allocated += sizeof(AttributeContainerType); - - if constexpr (std::is_same_v) + for (const auto & container : std::get>(attribute.containers)) { - /// It is not accurate calculations - bytes_allocated += sizeof(Array) * container.size(); - } - else - { - bytes_allocated += container.allocated_bytes(); - } + bytes_allocated += sizeof(AttributeContainerType); - bucket_count = container.capacity(); + if constexpr (std::is_same_v) + { + /// It is not accurate calculations + bytes_allocated += sizeof(Array) * container.size(); + } + else + { + bytes_allocated += container.allocated_bytes(); + } + + bucket_count = container.capacity(); + } }; callOnDictionaryAttributeType(attribute.type, type_call); if (attribute.is_index_null.has_value()) - bytes_allocated += (*attribute.is_index_null).size(); + for (const auto & container : attribute.is_index_null.value()) + bytes_allocated += container.size(); } if (update_field_loaded_block) @@ -826,18 +897,19 @@ void HashedArrayDictionary::calculateBytesAllocated() hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes(); bytes_allocated += hierarchical_index_bytes_allocated; } - - bytes_allocated += string_arena.allocatedBytes(); + for (const auto & string_arena : string_arenas) + bytes_allocated += string_arena->allocatedBytes(); } -template -Pipe HashedArrayDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const +template +Pipe HashedArrayDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const { PaddedPODArray keys; - keys.reserve(key_attribute.container.size()); + keys.reserve(total_element_count); - for (auto & [key, _] : key_attribute.container) - keys.emplace_back(key); + for (const auto & container : key_attribute.containers) + for (auto & [key, _] : container) + keys.emplace_back(key); ColumnsWithTypeAndName key_columns; @@ -858,8 +930,10 @@ Pipe HashedArrayDictionary::read(const Names & column_names return result; } -template class HashedArrayDictionary; -template class HashedArrayDictionary; +template class HashedArrayDictionary; +template class HashedArrayDictionary; +template class HashedArrayDictionary; +template class HashedArrayDictionary; void registerDictionaryArrayHashed(DictionaryFactory & factory) { @@ -886,7 +960,14 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime}; + std::string dictionary_layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "hashed_array" : "complex_key_hashed_array"; + std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name; + + Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1); + if (shards <= 0 || 128 < shards) + throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name); + + HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast(shards)}; ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); const auto & settings = context->getSettingsRef(); @@ -895,9 +976,17 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; if (dictionary_key_type == DictionaryKeyType::Simple) - return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + { + if (shards > 1) + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + } else - return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + { + if (shards > 1) + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + } }; factory.registerLayout("hashed_array", diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 3b9446e4e8f..606008ce921 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -13,6 +13,7 @@ #include #include #include +#include /** This dictionary stores all attributes in arrays. * Key is stored in hash table and value is index into attribute array. @@ -25,12 +26,17 @@ struct HashedArrayDictionaryStorageConfiguration { const bool require_nonempty; const DictionaryLifetime lifetime; + size_t shards = 1; + size_t shard_load_queue_backlog = 10000; bool use_async_executor = false; }; -template +template class HashedArrayDictionary final : public IDictionary { + using DictionaryParallelLoaderType = HashedDictionaryImpl::HashedDictionaryParallelLoader>; + friend class HashedDictionaryImpl::HashedDictionaryParallelLoader>; + public: using KeyType = std::conditional_t; @@ -63,13 +69,13 @@ public: double getHitRate() const override { return 1.0; } - size_t getElementCount() const override { return element_count; } + size_t getElementCount() const override { return total_element_count; } - double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } + double getLoadFactor() const override { return static_cast(total_element_count) / bucket_count; } std::shared_ptr clone() const override { - return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); + return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } DictionarySourcePtr getSource() const override { return source_ptr; } @@ -132,50 +138,54 @@ private: template using AttributeContainerType = std::conditional_t, std::vector, PaddedPODArray>; + template + using AttributeContainerShardsType = std::vector>; + struct Attribute final { AttributeUnderlyingType type; std::variant< - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType, - AttributeContainerType> - container; + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType, + AttributeContainerShardsType> + containers; - std::optional> is_index_null; + /// One container per shard + using RowsMask = std::vector; + std::optional> is_index_null; }; struct KeyAttribute final { - - KeyContainerType container; - + /// One container per shard + std::vector containers; }; void createAttributes(); - void blockToAttributes(const Block & block); + void blockToAttributes(const Block & block, DictionaryKeysArenaHolder & arena_holder, size_t shard); void updateData(); @@ -185,6 +195,22 @@ private: void calculateBytesAllocated(); + UInt64 getShard(UInt64 key) const + { + if constexpr (!sharded) + return 0; + /// NOTE: function here should not match with the DefaultHash<> since + /// it used for the HashMap/sparse_hash_map. + return intHashCRC32(key) % configuration.shards; + } + + UInt64 getShard(StringRef key) const + { + if constexpr (!sharded) + return 0; + return StringRefHash()(key) % configuration.shards; + } + template ColumnPtr getAttributeColumn( const Attribute & attribute, @@ -200,10 +226,13 @@ private: ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; + + using KeyIndexToElementIndex = std::conditional_t>, PaddedPODArray>; + template void getItemsImpl( const Attribute & attribute, - const PaddedPODArray & key_index_to_element_index, + const KeyIndexToElementIndex & key_index_to_element_index, ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; @@ -215,6 +244,8 @@ private: void resize(size_t total_rows); + Poco::Logger * log; + const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const HashedArrayDictionaryStorageConfiguration configuration; @@ -225,17 +256,20 @@ private: size_t bytes_allocated = 0; size_t hierarchical_index_bytes_allocated = 0; - size_t element_count = 0; + std::atomic total_element_count = 0; + std::vector element_counts; size_t bucket_count = 0; mutable std::atomic query_count{0}; mutable std::atomic found_count{0}; BlockPtr update_field_loaded_block; - Arena string_arena; + std::vector> string_arenas; DictionaryHierarchicalParentToChildIndexPtr hierarchical_index; }; -extern template class HashedArrayDictionary; -extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; } diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 376637189dd..8009ffab80a 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -71,7 +71,8 @@ struct HashedDictionaryConfiguration template class HashedDictionary final : public IDictionary { - friend class HashedDictionaryParallelLoader; + using DictionaryParallelLoaderType = HashedDictionaryParallelLoader>; + friend class HashedDictionaryParallelLoader>; public: using KeyType = std::conditional_t; @@ -987,7 +988,7 @@ void HashedDictionary::getItemsImpl( auto key = keys_extractor.extractCurrentKey(); auto shard = getShard(key); - const auto & container = attribute_containers[getShard(key)]; + const auto & container = attribute_containers[shard]; const auto it = container.find(key); if (it != container.end()) @@ -1020,11 +1021,11 @@ void HashedDictionary::loadData() { if (!source_ptr->hasUpdateField()) { - std::optional> parallel_loader; + std::optional parallel_loader; if constexpr (sharded) parallel_loader.emplace(*this); - QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll()); + QueryPipeline pipeline(source_ptr->loadAll()); DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h index b52158c7fcb..907a987555e 100644 --- a/src/Dictionaries/HashedDictionaryParallelLoader.h +++ b/src/Dictionaries/HashedDictionaryParallelLoader.h @@ -38,13 +38,12 @@ namespace DB::HashedDictionaryImpl { /// Implementation parallel dictionary load for SHARDS -template +template class HashedDictionaryParallelLoader : public boost::noncopyable { - using HashedDictionary = HashedDictionary; public: - explicit HashedDictionaryParallelLoader(HashedDictionary & dictionary_) + explicit HashedDictionaryParallelLoader(DictionaryType & dictionary_) : dictionary(dictionary_) , shards(dictionary.configuration.shards) , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards) @@ -118,7 +117,7 @@ public: } private: - HashedDictionary & dictionary; + DictionaryType & dictionary; const size_t shards; ThreadPool pool; std::vector>> shards_queues; diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 9be9fa1d0d4..c44bffe42e1 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -683,7 +683,7 @@ void RangeHashedDictionary::loadData() if (configuration.require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, - "{}: dictionary source is empty and 'require_nonempty' property is set."); + "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName()); } template diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 23dc7db508d..080f7db96be 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -76,7 +76,7 @@ XDBCDictionarySource::XDBCDictionarySource( , load_all_query(query_builder.composeLoadAllQuery()) , bridge_helper(bridge_) , bridge_url(bridge_helper->getMainURI()) - , timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0})) + , timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout)) { auto url_params = bridge_helper->getURLParams(max_block_size); for (const auto & [name, value] : url_params) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index b1f55e96967..8e21b95ce68 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -373,7 +373,7 @@ void DiskLocal::removeDirectory(const String & path) { auto fs_path = fs::path(disk_path) / path; if (0 != rmdir(fs_path.c_str())) - ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot rmdir {}", fs_path); + ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot remove directory {}", fs_path); } void DiskLocal::removeRecursive(const String & path) diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 46d8c41ff78..90cd5285875 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -54,8 +54,7 @@ std::unique_ptr ReadBufferFromWebServer::initialize() } const auto & settings = context->getSettingsRef(); - const auto & config = context->getConfigRef(); - Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}; + const auto & server_settings = context->getServerSettings(); auto res = std::make_unique( uri, @@ -65,7 +64,7 @@ std::unique_ptr ReadBufferFromWebServer::initialize() settings.http_send_timeout, std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)), settings.tcp_keep_alive_timeout, - http_keep_alive_timeout), + server_settings.keep_alive_timeout), credentials, 0, buf_size, diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 0103188b562..f3b0cb8b9a0 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -47,7 +47,7 @@ void WebObjectStorage::initialize(const String & uri_path, const std::unique_loc ReadWriteBufferFromHTTP::OutStreamCallback(), ConnectionTimeouts::getHTTPTimeouts( getContext()->getSettingsRef(), - {getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}), + getContext()->getServerSettings().keep_alive_timeout), credentials, /* max_redirects= */ 0, /* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 7ddfdb6b572..b8b9a9ecb0d 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -24,7 +24,6 @@ namespace ErrorCodes namespace JSONUtils { - template static std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows) @@ -72,7 +71,7 @@ namespace JSONUtils } else { - pos = find_first_symbols(pos, in.buffer().end()); + pos = find_first_symbols(pos, in.buffer().end()); if (pos > in.buffer().end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); @@ -89,19 +88,13 @@ namespace JSONUtils --balance; ++pos; } - else if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } else if (*pos == '"') { quotes = true; ++pos; } - if (balance == 0) + if (!quotes && balance == 0) { ++number_of_rows; if ((number_of_rows >= min_rows) @@ -115,13 +108,14 @@ namespace JSONUtils return {loadAtPosition(in, memory, pos), number_of_rows}; } - std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + std::pair fileSegmentationEngineJSONEachRow( + ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_bytes, 1, max_rows); } - std::pair - fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows) + std::pair fileSegmentationEngineJSONCompactEachRow( + ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows) { return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows); } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 6c9f1a94022..7c7ccac8b01 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -14,7 +14,6 @@ void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory); -void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory); #if USE_HIVE void registerFileSegmentationEngineHiveText(FormatFactory & factory); #endif @@ -161,7 +160,6 @@ void registerFormats() registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineJSONAsString(factory); registerFileSegmentationEngineJSONAsObject(factory); - registerFileSegmentationEngineJSONCompactEachRow(factory); #if USE_HIVE registerFileSegmentationEngineHiveText(factory); #endif @@ -294,4 +292,3 @@ void registerFormats() } } - diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 89676594581..a06e898b7c5 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids) list (APPEND PRIVATE_LIBS ch_contrib::sqids) endif() +if (TARGET ch_contrib::idna) + list (APPEND PRIVATE_LIBS ch_contrib::idna) +endif() + if (TARGET ch_contrib::h3) list (APPEND PRIVATE_LIBS ch_contrib::h3) endif() diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/FunctionSqid.cpp index 546263914c2..4517bba963e 100644 --- a/src/Functions/FunctionSqid.cpp +++ b/src/Functions/FunctionSqid.cpp @@ -1,6 +1,6 @@ #include "config.h" -#ifdef ENABLE_SQIDS +#if USE_SQIDS #include #include diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index bef1e7b420a..5a06c363065 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1413,10 +1413,10 @@ inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuff template <> inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) { - time_t tmp = 0; - if (!tryReadDateTimeText(tmp, rb, *time_zone)) + time_t time = 0; + if (!tryReadDateTimeText(time, rb, *time_zone)) return false; - x = static_cast(tmp); + convertFromTime(x, time); return true; } @@ -1697,7 +1697,6 @@ struct ConvertThroughParsing break; } } - parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); } while (false); } @@ -3291,7 +3290,6 @@ private: { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); return createFunctionAdaptor(function, from_type); } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index db6529da73c..37ddfd6168e 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -654,7 +654,7 @@ private: if (tuple_size < 1) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Tuple second argument of function {} must contain multiple constant string columns"); + "Tuple second argument of function {} must contain multiple constant string columns", getName()); for (size_t i = 0; i < tuple_col.tupleSize(); ++i) { diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 9468bc259e3..d0edd34e657 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -15,24 +15,13 @@ #endif #include -#if USE_BLAKE3 -# include -#endif - #include #include #include #include #if USE_SSL -# include # include -# include -#if USE_BORINGSSL -# include -#else -# include -#endif #endif #include @@ -73,7 +62,6 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_COLUMN; - extern const int SUPPORT_IS_DISABLED; } namespace impl @@ -191,6 +179,40 @@ T combineHashesFunc(T t1, T t2) } +struct SipHash64Impl +{ + static constexpr auto name = "sipHash64"; + using ReturnType = UInt64; + + static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); } + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc(h1, h2); } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct SipHash64KeyedImpl +{ + static constexpr auto name = "sipHash64Keyed"; + using ReturnType = UInt64; + using Key = impl::SipHashKey; + using KeyColumns = impl::SipHashKeyColumns; + + static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } + static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + + static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } + + static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2) + { + transformEndianness(h1); + transformEndianness(h2); + const UInt64 hashes[]{h1, h2}; + return applyKeyed(key, reinterpret_cast(hashes), sizeof(hashes)); + } + + static constexpr bool use_int_hash_for_pods = false; +}; + #if USE_SSL struct HalfMD5Impl { @@ -225,159 +247,8 @@ struct HalfMD5Impl static constexpr bool use_int_hash_for_pods = false; }; - -struct MD4Impl -{ - static constexpr auto name = "MD4"; - enum { length = MD4_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - MD4_CTX ctx; - MD4_Init(&ctx); - MD4_Update(&ctx, reinterpret_cast(begin), size); - MD4_Final(out_char_data, &ctx); - } -}; - -struct MD5Impl -{ - static constexpr auto name = "MD5"; - enum { length = MD5_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - MD5_CTX ctx; - MD5_Init(&ctx); - MD5_Update(&ctx, reinterpret_cast(begin), size); - MD5_Final(out_char_data, &ctx); - } -}; - -struct SHA1Impl -{ - static constexpr auto name = "SHA1"; - enum { length = SHA_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA_CTX ctx; - SHA1_Init(&ctx); - SHA1_Update(&ctx, reinterpret_cast(begin), size); - SHA1_Final(out_char_data, &ctx); - } -}; - -struct SHA224Impl -{ - static constexpr auto name = "SHA224"; - enum { length = SHA224_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA256_CTX ctx; - SHA224_Init(&ctx); - SHA224_Update(&ctx, reinterpret_cast(begin), size); - SHA224_Final(out_char_data, &ctx); - } -}; - -struct SHA256Impl -{ - static constexpr auto name = "SHA256"; - enum { length = SHA256_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA256_CTX ctx; - SHA256_Init(&ctx); - SHA256_Update(&ctx, reinterpret_cast(begin), size); - SHA256_Final(out_char_data, &ctx); - } -}; - -struct SHA384Impl -{ - static constexpr auto name = "SHA384"; - enum { length = SHA384_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA512_CTX ctx; - SHA384_Init(&ctx); - SHA384_Update(&ctx, reinterpret_cast(begin), size); - SHA384_Final(out_char_data, &ctx); - } -}; - -struct SHA512Impl -{ - static constexpr auto name = "SHA512"; - enum { length = 64 }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA512_CTX ctx; - SHA512_Init(&ctx); - SHA512_Update(&ctx, reinterpret_cast(begin), size); - SHA512_Final(out_char_data, &ctx); - } -}; - -struct SHA512Impl256 -{ - static constexpr auto name = "SHA512_256"; - enum { length = 32 }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default - /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init, - /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available - /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface. - auto md_ctx = EVP_MD_CTX_create(); - EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/); - EVP_DigestUpdate(md_ctx, begin, size); - EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/); - EVP_MD_CTX_destroy(md_ctx); - } -}; #endif -struct SipHash64Impl -{ - static constexpr auto name = "sipHash64"; - using ReturnType = UInt64; - - static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); } - static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc(h1, h2); } - - static constexpr bool use_int_hash_for_pods = false; -}; - -struct SipHash64KeyedImpl -{ - static constexpr auto name = "sipHash64Keyed"; - using ReturnType = UInt64; - using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } - - static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } - - static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2) - { - transformEndianness(h1); - transformEndianness(h2); - const UInt64 hashes[]{h1, h2}; - return applyKeyed(key, reinterpret_cast(hashes), sizeof(hashes)); - } - - static constexpr bool use_int_hash_for_pods = false; -}; - struct SipHash128Impl { static constexpr auto name = "sipHash128"; @@ -820,121 +691,6 @@ struct ImplXXH3 static constexpr bool use_int_hash_for_pods = false; }; -struct ImplBLAKE3 -{ - static constexpr auto name = "BLAKE3"; - enum { length = 32 }; - -#if !USE_BLAKE3 - [[noreturn]] static void apply(const char * /*begin*/, const size_t /*size*/, unsigned char * /*out_char_data*/) - { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled."); - } -#else - static void apply(const char * begin, const size_t size, unsigned char* out_char_data) - { - auto err_msg = blake3_apply_shim(begin, safe_cast(size), out_char_data); - if (err_msg != nullptr) - { - auto err_st = std::string(err_msg); - blake3_free_char_pointer(err_msg); - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st); - } - } -#endif -}; - -template -class FunctionStringHashFixedString : public IFunction -{ -public: - static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0]->getName(), getName()); - - return std::make_shared(Impl::length); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) - { - auto col_to = ColumnFixedString::create(Impl::length); - - const typename ColumnString::Chars & data = col_from->getChars(); - const typename ColumnString::Offsets & offsets = col_from->getOffsets(); - auto & chars_to = col_to->getChars(); - const auto size = offsets.size(); - chars_to.resize(size * Impl::length); - - ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - Impl::apply( - reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset - 1, - reinterpret_cast(&chars_to[i * Impl::length])); - - current_offset = offsets[i]; - } - - return col_to; - } - else if ( - const ColumnFixedString * col_from_fix = checkAndGetColumn(arguments[0].column.get())) - { - auto col_to = ColumnFixedString::create(Impl::length); - const typename ColumnFixedString::Chars & data = col_from_fix->getChars(); - const auto size = col_from_fix->size(); - auto & chars_to = col_to->getChars(); - const auto length = col_from_fix->getN(); - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) - { - Impl::apply( - reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); - } - return col_to; - } - else if ( - const ColumnIPv6 * col_from_ip = checkAndGetColumn(arguments[0].column.get())) - { - auto col_to = ColumnFixedString::create(Impl::length); - const typename ColumnIPv6::Container & data = col_from_ip->getData(); - const auto size = col_from_ip->size(); - auto & chars_to = col_to->getChars(); - const auto length = IPV6_BINARY_LENGTH; - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) - { - Impl::apply( - reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); - } - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); - } -}; - - DECLARE_MULTITARGET_CODE( template @@ -1817,15 +1573,7 @@ using FunctionSipHash64Keyed = FunctionAnyHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL -using FunctionMD4 = FunctionStringHashFixedString; using FunctionHalfMD5 = FunctionAnyHash; -using FunctionMD5 = FunctionStringHashFixedString; -using FunctionSHA1 = FunctionStringHashFixedString; -using FunctionSHA224 = FunctionStringHashFixedString; -using FunctionSHA256 = FunctionStringHashFixedString; -using FunctionSHA384 = FunctionStringHashFixedString; -using FunctionSHA512 = FunctionStringHashFixedString; -using FunctionSHA512_256 = FunctionStringHashFixedString; #endif using FunctionSipHash128 = FunctionAnyHash; using FunctionSipHash128Keyed = FunctionAnyHash; @@ -1854,7 +1602,6 @@ using FunctionXxHash64 = FunctionAnyHash; using FunctionXXH3 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; -using FunctionBLAKE3 = FunctionStringHashFixedString; } #ifdef __clang__ diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index f56568b2508..38f16af0e6d 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -46,19 +46,34 @@ REGISTER_FUNCTION(Hashing) factory.registerFunction(); +#if USE_SSL + factory.registerFunction(FunctionDocumentation{ + .description = R"( +[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input +parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the +resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is +relatively slow (5 million short strings per second per processor core). - factory.registerFunction( - FunctionDocumentation{ - .description=R"( -Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. -This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. -The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. -It returns a BLAKE3 hash as a byte array with type FixedString(32). -)", - .examples{ - {"hash", "SELECT hex(BLAKE3('ABC'))", ""}}, - .categories{"Hash"} - }, - FunctionFactory::CaseSensitive); +Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead. + )", + .syntax = "SELECT halfMD5(par1,par2,...,parN);", + .arguments + = {{"par1,par2,...,parN", + R"( +The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated +value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed +Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data). + )"}}, + .returned_value = "The computed half MD5 hash of the given input params returned as a " + "[UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.", + .examples + = {{"", + "SELECT HEX(halfMD5('abc', 'cde', 'fgh'));", + R"( +┌─hex(halfMD5('abc', 'cde', 'fgh'))─┐ +│ 2C9506B7374CFAF4 │ +└───────────────────────────────────┘ + )"}}}); +#endif } } diff --git a/src/Functions/FunctionsHashingSSL.cpp b/src/Functions/FunctionsHashingSSL.cpp deleted file mode 100644 index 3e109b8a11d..00000000000 --- a/src/Functions/FunctionsHashingSSL.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "config.h" - -#if USE_SSL - -#include "FunctionsHashing.h" -#include - -/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp -/// to better parallelize the build procedure and avoid MSan build failure -/// due to excessive resource consumption. - -namespace DB -{ - -REGISTER_FUNCTION(HashingSSL) -{ - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the MD4 hash of the given string.)", - .syntax = "SELECT MD4(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(MD4('abc'));", - R"( -┌─hex(MD4('abc'))──────────────────┐ -│ A448017AAF21D8525FC10AE87AA6729D │ -└──────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"( -[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input -parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the -resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is -relatively slow (5 million short strings per second per processor core). - -Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead. - )", - .syntax = "SELECT halfMD5(par1,par2,...,parN);", - .arguments = {{"par1,par2,...,parN", - R"( -The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated -value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed -Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data). - )" - }}, - .returned_value - = "The computed half MD5 hash of the given input params returned as a [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.", - .examples - = {{"", - "SELECT HEX(halfMD5('abc', 'cde', 'fgh'));", - R"( -┌─hex(halfMD5('abc', 'cde', 'fgh'))─┐ -│ 2C9506B7374CFAF4 │ -└───────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the MD5 hash of the given string.)", - .syntax = "SELECT MD5(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(MD5('abc'));", - R"( -┌─hex(MD5('abc'))──────────────────┐ -│ 900150983CD24FB0D6963F7D28E17F72 │ -└──────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA1 hash of the given string.)", - .syntax = "SELECT SHA1(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA1('abc'));", - R"( -┌─hex(SHA1('abc'))─────────────────────────┐ -│ A9993E364706816ABA3E25717850C26C9CD0D89D │ -└──────────────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA224 hash of the given string.)", - .syntax = "SELECT SHA224(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA224('abc'));", - R"( -┌─hex(SHA224('abc'))───────────────────────────────────────┐ -│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │ -└──────────────────────────────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA256 hash of the given string.)", - .syntax = "SELECT SHA256(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA256('abc'));", - R"( -┌─hex(SHA256('abc'))───────────────────────────────────────────────┐ -│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │ -└──────────────────────────────────────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA384 hash of the given string.)", - .syntax = "SELECT SHA384(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA384('abc'));", - R"( -┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐ -│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │ -└──────────────────────────────────────────────────────────────────────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA512 hash of the given string.)", - .syntax = "SELECT SHA512(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA512('abc'));", - R"( -┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │ -└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ - )" - }} - }); - factory.registerFunction(FunctionDocumentation{ - .description = R"(Calculates the SHA512_256 hash of the given string.)", - .syntax = "SELECT SHA512_256(s);", - .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, - .returned_value - = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", - .examples - = {{"", - "SELECT HEX(SHA512_256('abc'));", - R"( -┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐ -│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │ -└──────────────────────────────────────────────────────────────────┘ - )" - }} - }); -} -} - -#endif diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp new file mode 100644 index 00000000000..fd42a84fa26 --- /dev/null +++ b/src/Functions/FunctionsStringHashFixedString.cpp @@ -0,0 +1,440 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#if USE_BLAKE3 +# include +#endif + +#if USE_SSL +# include +# include +# include +# if USE_BORINGSSL +# include +# else +# include +# endif +#endif + +/// Instatiating only the functions that require FunctionStringHashFixedString in a separate file +/// to better parallelize the build procedure and avoid MSan build failure +/// due to excessive resource consumption. + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + + +#if USE_SSL + +struct MD4Impl +{ + static constexpr auto name = "MD4"; + enum + { + length = MD4_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + MD4_CTX ctx; + MD4_Init(&ctx); + MD4_Update(&ctx, reinterpret_cast(begin), size); + MD4_Final(out_char_data, &ctx); + } +}; + +struct MD5Impl +{ + static constexpr auto name = "MD5"; + enum + { + length = MD5_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + MD5_CTX ctx; + MD5_Init(&ctx); + MD5_Update(&ctx, reinterpret_cast(begin), size); + MD5_Final(out_char_data, &ctx); + } +}; + +struct SHA1Impl +{ + static constexpr auto name = "SHA1"; + enum + { + length = SHA_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA_CTX ctx; + SHA1_Init(&ctx); + SHA1_Update(&ctx, reinterpret_cast(begin), size); + SHA1_Final(out_char_data, &ctx); + } +}; + +struct SHA224Impl +{ + static constexpr auto name = "SHA224"; + enum + { + length = SHA224_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA256_CTX ctx; + SHA224_Init(&ctx); + SHA224_Update(&ctx, reinterpret_cast(begin), size); + SHA224_Final(out_char_data, &ctx); + } +}; + +struct SHA256Impl +{ + static constexpr auto name = "SHA256"; + enum + { + length = SHA256_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA256_CTX ctx; + SHA256_Init(&ctx); + SHA256_Update(&ctx, reinterpret_cast(begin), size); + SHA256_Final(out_char_data, &ctx); + } +}; + +struct SHA384Impl +{ + static constexpr auto name = "SHA384"; + enum + { + length = SHA384_DIGEST_LENGTH + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA512_CTX ctx; + SHA384_Init(&ctx); + SHA384_Update(&ctx, reinterpret_cast(begin), size); + SHA384_Final(out_char_data, &ctx); + } +}; + +struct SHA512Impl +{ + static constexpr auto name = "SHA512"; + enum + { + length = 64 + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA512_CTX ctx; + SHA512_Init(&ctx); + SHA512_Update(&ctx, reinterpret_cast(begin), size); + SHA512_Final(out_char_data, &ctx); + } +}; + +struct SHA512Impl256 +{ + static constexpr auto name = "SHA512_256"; + enum + { + length = 32 + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default + /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init, + /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available + /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface. + auto * md_ctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/); + EVP_DigestUpdate(md_ctx, begin, size); + EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/); + EVP_MD_CTX_destroy(md_ctx); + } +}; +#endif + +#if USE_BLAKE3 +struct ImplBLAKE3 +{ + static constexpr auto name = "BLAKE3"; + enum + { + length = 32 + }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length); + auto & result = *reinterpret_cast *>(out_char_data); + + llvm::BLAKE3 hasher; + if (size > 0) + hasher.update(llvm::StringRef(begin, size)); + hasher.final(result); + } +}; + +#endif + +template +class FunctionStringHashFixedString : public IFunction +{ +public: + static constexpr auto name = Impl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); + + return std::make_shared(Impl::length); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) + { + auto col_to = ColumnFixedString::create(Impl::length); + + const typename ColumnString::Chars & data = col_from->getChars(); + const typename ColumnString::Offsets & offsets = col_from->getOffsets(); + auto & chars_to = col_to->getChars(); + const auto size = offsets.size(); + chars_to.resize(size * Impl::length); + + ColumnString::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + Impl::apply( + reinterpret_cast(&data[current_offset]), + offsets[i] - current_offset - 1, + reinterpret_cast(&chars_to[i * Impl::length])); + + current_offset = offsets[i]; + } + + return col_to; + } + else if (const ColumnFixedString * col_from_fix = checkAndGetColumn(arguments[0].column.get())) + { + auto col_to = ColumnFixedString::create(Impl::length); + const typename ColumnFixedString::Chars & data = col_from_fix->getChars(); + const auto size = col_from_fix->size(); + auto & chars_to = col_to->getChars(); + const auto length = col_from_fix->getN(); + chars_to.resize(size * Impl::length); + for (size_t i = 0; i < size; ++i) + { + Impl::apply( + reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); + } + return col_to; + } + else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn(arguments[0].column.get())) + { + auto col_to = ColumnFixedString::create(Impl::length); + const typename ColumnIPv6::Container & data = col_from_ip->getData(); + const auto size = col_from_ip->size(); + auto & chars_to = col_to->getChars(); + const auto length = IPV6_BINARY_LENGTH; + chars_to.resize(size * Impl::length); + for (size_t i = 0; i < size; ++i) + { + Impl::apply( + reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); + } + return col_to; + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); + } +}; + +#if USE_SSL || USE_BLAKE3 +REGISTER_FUNCTION(HashFixedStrings) +{ +# if USE_SSL + using FunctionMD4 = FunctionStringHashFixedString; + using FunctionMD5 = FunctionStringHashFixedString; + using FunctionSHA1 = FunctionStringHashFixedString; + using FunctionSHA224 = FunctionStringHashFixedString; + using FunctionSHA256 = FunctionStringHashFixedString; + using FunctionSHA384 = FunctionStringHashFixedString; + using FunctionSHA512 = FunctionStringHashFixedString; + using FunctionSHA512_256 = FunctionStringHashFixedString; + + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the MD4 hash of the given string.)", + .syntax = "SELECT MD4(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(MD4('abc'));", + R"( +┌─hex(MD4('abc'))──────────────────┐ +│ A448017AAF21D8525FC10AE87AA6729D │ +└──────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the MD5 hash of the given string.)", + .syntax = "SELECT MD5(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(MD5('abc'));", + R"( +┌─hex(MD5('abc'))──────────────────┐ +│ 900150983CD24FB0D6963F7D28E17F72 │ +└──────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA1 hash of the given string.)", + .syntax = "SELECT SHA1(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA1('abc'));", + R"( +┌─hex(SHA1('abc'))─────────────────────────┐ +│ A9993E364706816ABA3E25717850C26C9CD0D89D │ +└──────────────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA224 hash of the given string.)", + .syntax = "SELECT SHA224(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA224('abc'));", + R"( +┌─hex(SHA224('abc'))───────────────────────────────────────┐ +│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │ +└──────────────────────────────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA256 hash of the given string.)", + .syntax = "SELECT SHA256(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA256('abc'));", + R"( +┌─hex(SHA256('abc'))───────────────────────────────────────────────┐ +│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │ +└──────────────────────────────────────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA384 hash of the given string.)", + .syntax = "SELECT SHA384(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA384('abc'));", + R"( +┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐ +│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA512 hash of the given string.)", + .syntax = "SELECT SHA512(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA512('abc'));", + R"( +┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + )"}}}); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Calculates the SHA512_256 hash of the given string.)", + .syntax = "SELECT SHA512_256(s);", + .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, + .returned_value + = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", + .examples + = {{"", + "SELECT HEX(SHA512_256('abc'));", + R"( +┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐ +│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │ +└──────────────────────────────────────────────────────────────────┘ + )"}}}); + + +# endif + +# if USE_BLAKE3 + using FunctionBLAKE3 = FunctionStringHashFixedString; + factory.registerFunction( + FunctionDocumentation{ + .description = R"( + Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. + This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. + The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. + It returns a BLAKE3 hash as a byte array with type FixedString(32). + )", + .examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}}, + .categories{"Hash"}}, + FunctionFactory::CaseSensitive); +# endif +} +#endif +} diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index f28194781c2..eb7ef4abe56 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -125,7 +125,7 @@ void GregorianDate::init(ReadBuffer & in) assertEOF(in); if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {})."); + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {}).", year_, month_, day_of_month_); } bool GregorianDate::tryInit(ReadBuffer & in) diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 4d7d9ffb56c..b057e7fede5 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -145,13 +145,13 @@ private: } write_helper.finalize(); - /// Same as the normal `ColumnString` branch - has_column_string = true; - data[i] = &converted_col_str->getChars(); - offsets[i] = &converted_col_str->getOffsets(); - /// Keep the pointer alive converted_col_ptrs[i] = std::move(converted_col_str); + + /// Same as the normal `ColumnString` branch + has_column_string = true; + data[i] = &converted_col_ptrs[i]->getChars(); + offsets[i] = &converted_col_ptrs[i]->getOffsets(); } } diff --git a/src/Functions/format.cpp b/src/Functions/format.cpp index f1f73cfe438..41b6d65023b 100644 --- a/src/Functions/format.cpp +++ b/src/Functions/format.cpp @@ -108,13 +108,13 @@ public: } write_helper.finalize(); - /// Same as the normal `ColumnString` branch - has_column_string = true; - data[i - 1] = &converted_col_str->getChars(); - offsets[i - 1] = &converted_col_str->getOffsets(); - /// Keep the pointer alive converted_col_ptrs[i - 1] = std::move(converted_col_str); + + /// Same as the normal `ColumnString` branch + has_column_string = true; + data[i - 1] = &converted_col_ptrs[i - 1]->getChars(); + offsets[i - 1] = &converted_col_ptrs[i - 1]->getOffsets(); } } diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp index 8d065b01c34..f27cd26fd9d 100644 --- a/src/Functions/geoToS2.cpp +++ b/src/Functions/geoToS2.cpp @@ -101,19 +101,35 @@ public: const Float64 lon = data_col_lon[row]; const Float64 lat = data_col_lat[row]; - if (isNaN(lon) || isNaN(lat)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be NaN"); + if (isNaN(lon)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for longitude in function {}. It must not be NaN", getName()); + if (!isFinite(lon)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal argument for longitude in function {}. It must not be infinite", + getName()); - if (!(isFinite(lon) && isFinite(lat))) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be infinite"); + if (isNaN(lat)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for latitude in function {}. It must not be NaN", getName()); + if (!isFinite(lat)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal argument for latitude in function {}. It must not be infinite", + getName()); - /// S2 acceptes point as (latitude, longitude) + /// S2 accepts point as (latitude, longitude) S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon); if (!lat_lng.is_valid()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive" - "and the longitude is between -180 and 180 degrees inclusive."); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Point ({}, {}) is invalid in function {}. For valid point the latitude is between -90 and 90 degrees inclusive" + "and the longitude is between -180 and 180 degrees inclusive.", + lon, + lat, + getName()); S2CellId id(lat_lng); diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp index 897329ed9ec..f8a10d5252b 100644 --- a/src/Functions/h3ToString.cpp +++ b/src/Functions/h3ToString.cpp @@ -84,7 +84,7 @@ public: const UInt64 hindex = data[row]; if (!isValidCell(hindex)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {}", hindex); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {} in function {}", hindex, getName()); h3ToString(hindex, pos, H3_INDEX_STRING_LENGTH); diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index eba1733c683..cae3b720d8b 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -23,7 +23,7 @@ #include #include #include - +#include namespace DB { @@ -42,7 +42,8 @@ using namespace GatherUtils; /** Selection function by condition: if(cond, then, else). * cond - UInt8 * then, else - numeric types for which there is a general type, or dates, datetimes, or strings, or arrays of these types. - */ + * For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b), except floating point types because of Inf or NaN. +*/ template inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res) @@ -55,24 +56,48 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr { size_t a_index = 0, b_index = 0; for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[b_index++]); + { + if constexpr (std::is_integral_v) + { + res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[b_index]); + a_index += !!cond[i]; + b_index += !cond[i]; + } + else + res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[b_index++]); + } } else if (a_is_short) { size_t a_index = 0; for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[i]); + if constexpr (std::is_integral_v) + { + res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[i]); + a_index += !!cond[i]; + } + else + res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[i]); } else if (b_is_short) { size_t b_index = 0; for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[b_index++]); + if constexpr (std::is_integral_v) + { + res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[b_index]); + b_index += !cond[i]; + } + else + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[b_index++]); } else { for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[i]); + if constexpr (std::is_integral_v) + res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[i]); + else + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[i]); } } @@ -85,12 +110,21 @@ inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, Ar { size_t a_index = 0; for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b); + if constexpr (std::is_integral_v) + { + res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b); + a_index += !!cond[i]; + } + else + res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b); } else { for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b); + if constexpr (std::is_integral_v) + res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b); + else + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b); } } @@ -103,12 +137,21 @@ inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, Ar { size_t b_index = 0; for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a) : static_cast(b[b_index++]); + if constexpr (std::is_integral_v) + { + res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[b_index]); + b_index += !cond[i]; + } + else + res[i] = cond[i] ? static_cast(a) : static_cast(b[b_index++]); } else { for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a) : static_cast(b[i]); + if constexpr (std::is_integral_v) + res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[i]); + else + res[i] = cond[i] ? static_cast(a) : static_cast(b[i]); } } diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp new file mode 100644 index 00000000000..c11409f0d1a --- /dev/null +++ b/src/Functions/punycode.cpp @@ -0,0 +1,165 @@ +#include "config.h" + +#if USE_IDNA + +#include +#include + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wnewline-eof" +#endif +# include +# include +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +struct PunycodeEncodeImpl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::u32string value_utf32; + std::string value_puny; + for (size_t row = 0; row < rows; ++row) + { + const char * value = reinterpret_cast(&data[prev_offset]); + const size_t value_length = offsets[row] - prev_offset - 1; + + const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length); + value_utf32.resize(value_utf32_length); + ada::idna::utf8_to_utf32(value, value_length, value_utf32.data()); + + const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny); + if (!ok) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding"); + + res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + value_utf32.clear(); + value_puny.clear(); /// utf32_to_punycode() appends to its output string + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function"); + } +}; + +struct PunycodeDecodeImpl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::u32string value_utf32; + std::string value_utf8; + for (size_t row = 0; row < rows; ++row) + { + const char * value = reinterpret_cast(&data[prev_offset]); + const size_t value_length = offsets[row] - prev_offset - 1; + + const std::string_view value_punycode(value, value_length); + const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32); + if (!ok) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding"); + + const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size()); + value_utf8.resize(utf8_length); + ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data()); + + res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + value_utf32.clear(); /// punycode_to_utf32() appends to its output string + value_utf8.clear(); + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function"); + } +}; + +struct NamePunycodeEncode +{ + static constexpr auto name = "punycodeEncode"; +}; + +struct NamePunycodeDecode +{ + static constexpr auto name = "punycodeDecode"; +}; + +REGISTER_FUNCTION(Punycode) +{ + factory.registerFunction>(FunctionDocumentation{ + .description=R"( +Computes a Punycode representation of a string.)", + .syntax="punycodeEncode(str)", + .arguments={{"str", "Input string"}}, + .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT punycodeEncode('München') AS puny;", + R"( +┌─puny───────┐ +│ Mnchen-3ya │ +└────────────┘ + )" + }} + }); + + factory.registerFunction>(FunctionDocumentation{ + .description=R"( +Computes a Punycode representation of a string.)", + .syntax="punycodeDecode(str)", + .arguments={{"str", "A Punycode-encoded string"}}, + .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT punycodeDecode('Mnchen-3ya') AS plain;", + R"( +┌─plain───┐ +│ München │ +└─────────┘ + )" + }} + }); +} + +} + +#endif diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp index 9dfbc05a6a0..72e9da69a7d 100644 --- a/src/Functions/s2CapContains.cpp +++ b/src/Functions/s2CapContains.cpp @@ -131,16 +131,16 @@ public: const auto point = S2CellId(data_point[row]); if (isNaN(degrees)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan in function {}", getName()); if (std::isinf(degrees)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite"); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite in function {}", getName()); if (!center.is_valid()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center is not valid"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center (id {}) is not valid in function {}", data_center[row], getName()); if (!point.is_valid()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point (id {}) is not valid in function {}", data_point[row], getName()); S1Angle angle = S1Angle::Degrees(degrees); S2Cap cap(center.ToPoint(), angle); diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp index 1fac5fd6e60..320f3c964a2 100644 --- a/src/Functions/s2CellsIntersect.cpp +++ b/src/Functions/s2CellsIntersect.cpp @@ -100,10 +100,12 @@ public: const UInt64 id_second = data_id_second[row]; auto first_cell = S2CellId(id_first); - auto second_cell = S2CellId(id_second); + if (!first_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First cell (id {}) is not valid in function {}", id_first, getName()); - if (!first_cell.is_valid() || !second_cell.is_valid()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + auto second_cell = S2CellId(id_second); + if (!second_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second cell (id {}) is not valid in function {}", id_second, getName()); dst_data.emplace_back(S2CellId(id_first).intersects(S2CellId(id_second))); } diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp index b200f61315b..a6371b9ff68 100644 --- a/src/Functions/s2GetNeighbors.cpp +++ b/src/Functions/s2GetNeighbors.cpp @@ -94,7 +94,7 @@ public: S2CellId cell_id(id); if (!cell_id.is_valid()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell (id {}) is not valid in function {}", id, getName()); S2CellId neighbors[4]; cell_id.GetEdgeNeighbors(neighbors); diff --git a/src/Functions/stringToH3.cpp b/src/Functions/stringToH3.cpp index d8728b346d0..94418efdfdf 100644 --- a/src/Functions/stringToH3.cpp +++ b/src/Functions/stringToH3.cpp @@ -88,7 +88,7 @@ private: if (res_data[row_num] == 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {}", h3index_str); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {} in function {}", h3index_str, name); } h3index_source.next(); diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index abad8a0727d..7a051950f52 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -137,7 +137,7 @@ AIOContext::AIOContext(unsigned int) { ctx = io_setup(); if (ctx < 0) - throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed"); + throw DB::ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed"); } AIOContext::~AIOContext() diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h index d7ff038e7bc..cccc6dc953b 100644 --- a/src/IO/Archives/IArchiveWriter.h +++ b/src/IO/Archives/IArchiveWriter.h @@ -13,7 +13,7 @@ class WriteBufferFromFileBase; class IArchiveWriter : public std::enable_shared_from_this, boost::noncopyable { public: - /// Destructors finalizes writing the archive. + /// Call finalize() before destructing IArchiveWriter. virtual ~IArchiveWriter() = default; /// Starts writing a file to the archive. The function returns a write buffer, @@ -26,6 +26,10 @@ public: /// This function should be used mostly for debugging purposes. virtual bool isWritingFile() const = 0; + /// Finalizes writing of the archive. This function must be always called at the end of writing. + /// (Unless an error appeared and the archive is in fact no longer needed.) + virtual void finalize() = 0; + static constexpr const int kDefaultCompressionLevel = -1; /// Sets compression method and level. diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index b9a696ee2e2..af6c87e8c88 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -15,86 +15,56 @@ namespace ErrorCodes extern const int CANNOT_PACK_ARCHIVE; extern const int SUPPORT_IS_DISABLED; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } -using RawHandle = zipFile; - -/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. -class ZipArchiveWriter::HandleHolder +namespace { -public: - HandleHolder() = default; - - explicit HandleHolder(const std::shared_ptr & writer_) : writer(writer_), raw_handle(writer->acquireRawHandle()) { } - - ~HandleHolder() + void checkResultCodeImpl(int code, const String & file_name) { - if (raw_handle) + if (code >= ZIP_OK) + return; + + String message = "Code = "; + switch (code) { - try - { - int err = zipCloseFileInZip(raw_handle); - /// If err == ZIP_PARAMERROR the file is already closed. - if (err != ZIP_PARAMERROR) - checkResult(err); - } - catch (...) - { - tryLogCurrentException("ZipArchiveWriter"); - } - writer->releaseRawHandle(raw_handle); + case ZIP_ERRNO: message += "ERRNO, errno = " + errnoToString(); break; + case ZIP_PARAMERROR: message += "PARAMERROR"; break; + case ZIP_BADZIPFILE: message += "BADZIPFILE"; break; + case ZIP_INTERNALERROR: message += "INTERNALERROR"; break; + default: message += std::to_string(code); break; } + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack zip archive: {}, filename={}", message, quoteString(file_name)); } - - HandleHolder(HandleHolder && src) noexcept - { - *this = std::move(src); - } - - HandleHolder & operator=(HandleHolder && src) noexcept - { - writer = std::exchange(src.writer, nullptr); - raw_handle = std::exchange(src.raw_handle, nullptr); - return *this; - } - - RawHandle getRawHandle() const { return raw_handle; } - std::shared_ptr getWriter() const { return writer; } - - void checkResult(int code) const { writer->checkResult(code); } - -private: - std::shared_ptr writer; - RawHandle raw_handle = nullptr; -}; +} /// This class represents a WriteBuffer actually returned by writeFile(). class ZipArchiveWriter::WriteBufferFromZipArchive : public WriteBufferFromFileBase { public: - WriteBufferFromZipArchive(HandleHolder && handle_, const String & filename_) + WriteBufferFromZipArchive(std::shared_ptr archive_writer_, const String & filename_) : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) - , handle(std::move(handle_)) , filename(filename_) { - auto compress_method = handle.getWriter()->compression_method; - auto compress_level = handle.getWriter()->compression_level; + zip_handle = archive_writer_->startWritingFile(); + archive_writer = archive_writer_; + + auto compress_method = archive_writer_->getCompressionMethod(); + auto compress_level = archive_writer_->getCompressionLevel(); checkCompressionMethodIsEnabled(compress_method); const char * password_cstr = nullptr; - const String & password_str = handle.getWriter()->password; - if (!password_str.empty()) + String current_password = archive_writer_->getPassword(); + if (!current_password.empty()) { checkEncryptionIsEnabled(); - password_cstr = password_str.c_str(); + password_cstr = current_password.c_str(); } - RawHandle raw_handle = handle.getRawHandle(); - - checkResult(zipOpenNewFileInZip3_64( - raw_handle, + int code = zipOpenNewFileInZip3_64( + zip_handle, filename_.c_str(), /* zipfi= */ nullptr, /* extrafield_local= */ nullptr, @@ -110,21 +80,30 @@ public: /* strategy= */ 0, password_cstr, /* crc_for_crypting= */ 0, - /* zip64= */ true)); + /* zip64= */ true); + checkResultCode(code); } ~WriteBufferFromZipArchive() override { try { - finalize(); + closeFile(/* throw_if_error= */ false); + endWritingFile(); } catch (...) { - tryLogCurrentException("ZipArchiveWriter"); + tryLogCurrentException("WriteBufferFromZipArchive"); } } + void finalizeImpl() override + { + next(); + closeFile(/* throw_if_error= */ true); + endWritingFile(); + } + void sync() override { next(); } std::string getFileName() const override { return filename; } @@ -133,110 +112,106 @@ private: { if (!offset()) return; - RawHandle raw_handle = handle.getRawHandle(); - int code = zipWriteInFileInZip(raw_handle, working_buffer.begin(), static_cast(offset())); - checkResult(code); + chassert(zip_handle); + int code = zipWriteInFileInZip(zip_handle, working_buffer.begin(), static_cast(offset())); + checkResultCode(code); } - void checkResult(int code) const { handle.checkResult(code); } + void closeFile(bool throw_if_error) + { + if (zip_handle) + { + int code = zipCloseFileInZip(zip_handle); + zip_handle = nullptr; + if (throw_if_error) + checkResultCode(code); + } + } - HandleHolder handle; - String filename; + void endWritingFile() + { + if (auto archive_writer_ptr = archive_writer.lock()) + { + archive_writer_ptr->endWritingFile(); + archive_writer.reset(); + } + } + + void checkResultCode(int code) const { checkResultCodeImpl(code, filename); } + + std::weak_ptr archive_writer; + const String filename; + ZipHandle zip_handle; }; -namespace +/// Provides a set of functions allowing the minizip library to write its output +/// to a WriteBuffer instead of an ordinary file in the local filesystem. +class ZipArchiveWriter::StreamInfo { - /// Provides a set of functions allowing the minizip library to write its output - /// to a WriteBuffer instead of an ordinary file in the local filesystem. - class StreamFromWriteBuffer +public: + explicit StreamInfo(std::unique_ptr write_buffer_) + : write_buffer(std::move(write_buffer_)), start_offset(write_buffer->count()) { - public: - static RawHandle open(std::unique_ptr archive_write_buffer) - { - Opaque opaque{std::move(archive_write_buffer)}; + } - zlib_filefunc64_def func_def; - func_def.zopen64_file = &StreamFromWriteBuffer::openFileFunc; - func_def.zclose_file = &StreamFromWriteBuffer::closeFileFunc; - func_def.zread_file = &StreamFromWriteBuffer::readFileFunc; - func_def.zwrite_file = &StreamFromWriteBuffer::writeFileFunc; - func_def.zseek64_file = &StreamFromWriteBuffer::seekFunc; - func_def.ztell64_file = &StreamFromWriteBuffer::tellFunc; - func_def.zerror_file = &StreamFromWriteBuffer::testErrorFunc; - func_def.opaque = &opaque; + ~StreamInfo() = default; - return zipOpen2_64( - /* path= */ nullptr, - /* append= */ false, - /* globalcomment= */ nullptr, - &func_def); - } + ZipHandle makeZipHandle() + { + zlib_filefunc64_def func_def; + func_def.zopen64_file = &StreamInfo::openFileFunc; + func_def.zclose_file = &StreamInfo::closeFileFunc; + func_def.zread_file = &StreamInfo::readFileFunc; + func_def.zwrite_file = &StreamInfo::writeFileFunc; + func_def.zseek64_file = &StreamInfo::seekFunc; + func_def.ztell64_file = &StreamInfo::tellFunc; + func_def.zerror_file = &StreamInfo::testErrorFunc; + func_def.opaque = this; - private: - std::unique_ptr write_buffer; - UInt64 start_offset = 0; + return zipOpen2_64( + /* path= */ nullptr, + /* append= */ false, + /* globalcomment= */ nullptr, + &func_def); + } - struct Opaque - { - std::unique_ptr write_buffer; - }; + WriteBuffer & getWriteBuffer() { return *write_buffer; } - static void * openFileFunc(void * opaque, const void *, int) - { - Opaque & opq = *reinterpret_cast(opaque); - return new StreamFromWriteBuffer(std::move(opq.write_buffer)); - } +private: + /// We do nothing in openFileFunc() and in closeFileFunc() because we already have `write_buffer` (file is already opened). + static void * openFileFunc(void * opaque, const void *, int) { return opaque; } + static int closeFileFunc(void *, void *) { return ZIP_OK; } - explicit StreamFromWriteBuffer(std::unique_ptr write_buffer_) - : write_buffer(std::move(write_buffer_)), start_offset(write_buffer->count()) {} + static unsigned long writeFileFunc(void * opaque, void *, const void * buf, unsigned long size) // NOLINT(google-runtime-int) + { + auto * stream_info = reinterpret_cast(opaque); + stream_info->write_buffer->write(reinterpret_cast(buf), size); + return size; + } - ~StreamFromWriteBuffer() - { - write_buffer->finalize(); - } + static int testErrorFunc(void *, void *) { return ZIP_OK; } - static int closeFileFunc(void *, void * stream) - { - delete reinterpret_cast(stream); - return ZIP_OK; - } + static ZPOS64_T tellFunc(void * opaque, void *) + { + auto * stream_info = reinterpret_cast(opaque); + auto pos = stream_info->write_buffer->count() - stream_info->start_offset; + return pos; + } - static StreamFromWriteBuffer & get(void * ptr) - { - return *reinterpret_cast(ptr); - } + static long seekFunc(void *, void *, ZPOS64_T, int) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StreamInfo::seek() is not implemented"); + } - static unsigned long writeFileFunc(void *, void * stream, const void * buf, unsigned long size) // NOLINT(google-runtime-int) - { - auto & strm = get(stream); - strm.write_buffer->write(reinterpret_cast(buf), size); - return size; - } + static unsigned long readFileFunc(void *, void *, void *, unsigned long) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StreamInfo::readFile() is not implemented"); + } - static int testErrorFunc(void *, void *) - { - return ZIP_OK; - } - - static ZPOS64_T tellFunc(void *, void * stream) - { - auto & strm = get(stream); - auto pos = strm.write_buffer->count() - strm.start_offset; - return pos; - } - - static long seekFunc(void *, void *, ZPOS64_T, int) // NOLINT(google-runtime-int) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::seek must not be called"); - } - - static unsigned long readFileFunc(void *, void *, void *, unsigned long) // NOLINT(google-runtime-int) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::readFile must not be called"); - } - }; -} + std::unique_ptr write_buffer; + UInt64 start_offset; +}; ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_) @@ -248,21 +223,42 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ : path_to_archive(path_to_archive_), compression_method(MZ_COMPRESS_METHOD_DEFLATE) { if (archive_write_buffer_) - handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_)); + { + stream_info = std::make_unique(std::move(archive_write_buffer_)); + zip_handle = stream_info->makeZipHandle(); + } else - handle = zipOpen64(path_to_archive.c_str(), /* append= */ false); - if (!handle) - throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive)); + { + zip_handle = zipOpen64(path_to_archive.c_str(), /* append= */ false); + } + if (!zip_handle) + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive)); } ZipArchiveWriter::~ZipArchiveWriter() { - if (handle) + if (!finalized) + { + /// It is totally OK to destroy instance without finalization when an exception occurs. + /// However it is suspicious to destroy instance without finalization at the green path. + if (!std::uncaught_exceptions() && std::current_exception() == nullptr) + { + Poco::Logger * log = &Poco::Logger::get("ZipArchiveWriter"); + LOG_ERROR(log, + "ZipArchiveWriter is not finalized when destructor is called. " + "The zip archive might not be written at all or might be truncated. " + "Stack trace: {}", StackTrace().toString()); + chassert(false && "ZipArchiveWriter is not finalized in destructor."); + } + } + + if (zip_handle) { try { - checkResult(zipClose(handle, /* global_comment= */ nullptr)); + zipCloseFileInZip(zip_handle); + zipClose(zip_handle, /* global_comment= */ nullptr); } catch (...) { @@ -273,13 +269,38 @@ ZipArchiveWriter::~ZipArchiveWriter() std::unique_ptr ZipArchiveWriter::writeFile(const String & filename) { - return std::make_unique(acquireHandle(), filename); + return std::make_unique(std::static_pointer_cast(shared_from_this()), filename); } bool ZipArchiveWriter::isWritingFile() const { std::lock_guard lock{mutex}; - return !handle; + return is_writing_file; +} + +void ZipArchiveWriter::finalize() +{ + std::lock_guard lock{mutex}; + if (finalized) + return; + + if (is_writing_file) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ZipArchiveWriter::finalize() is called in the middle of writing a file into the zip archive. That's not allowed"); + + if (zip_handle) + { + int code = zipClose(zip_handle, /* global_comment= */ nullptr); + zip_handle = nullptr; + checkResultCode(code); + } + + if (stream_info) + { + stream_info->getWriteBuffer().finalize(); + stream_info.reset(); + } + + finalized = true; } void ZipArchiveWriter::setCompression(const String & compression_method_, int compression_level_) @@ -289,12 +310,30 @@ void ZipArchiveWriter::setCompression(const String & compression_method_, int co compression_level = compression_level_; } +int ZipArchiveWriter::getCompressionMethod() const +{ + std::lock_guard lock{mutex}; + return compression_method; +} + +int ZipArchiveWriter::getCompressionLevel() const +{ + std::lock_guard lock{mutex}; + return compression_level; +} + void ZipArchiveWriter::setPassword(const String & password_) { std::lock_guard lock{mutex}; password = password_; } +String ZipArchiveWriter::getPassword() const +{ + std::lock_guard lock{mutex}; + return password; +} + int ZipArchiveWriter::compressionMethodToInt(const String & compression_method_) { if (compression_method_.empty()) @@ -361,45 +400,24 @@ void ZipArchiveWriter::checkEncryptionIsEnabled() #endif } -ZipArchiveWriter::HandleHolder ZipArchiveWriter::acquireHandle() -{ - return HandleHolder{std::static_pointer_cast(shared_from_this())}; -} - -RawHandle ZipArchiveWriter::acquireRawHandle() +ZipArchiveWriter::ZipHandle ZipArchiveWriter::startWritingFile() { std::lock_guard lock{mutex}; - if (!handle) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot have more than one write buffer while writing a zip archive"); - return std::exchange(handle, nullptr); + if (is_writing_file) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot write two files to a zip archive in parallel"); + is_writing_file = true; + return zip_handle; } -void ZipArchiveWriter::releaseRawHandle(RawHandle raw_handle_) +void ZipArchiveWriter::endWritingFile() { std::lock_guard lock{mutex}; - handle = raw_handle_; + is_writing_file = false; } -void ZipArchiveWriter::checkResult(int code) const +void ZipArchiveWriter::checkResultCode(int code) const { - if (code >= ZIP_OK) - return; - - String message = "Code = "; - switch (code) - { - case ZIP_ERRNO: message += "ERRNO, errno = " + errnoToString(); break; - case ZIP_PARAMERROR: message += "PARAMERROR"; break; - case ZIP_BADZIPFILE: message += "BADZIPFILE"; break; - case ZIP_INTERNALERROR: message += "INTERNALERROR"; break; - default: message += std::to_string(code); break; - } - showError(message); -} - -void ZipArchiveWriter::showError(const String & message) const -{ - throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack zip archive {}: {}", quoteString(path_to_archive), message); + checkResultCodeImpl(code, path_to_archive); } } diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h index a54130556b3..891da1a2e75 100644 --- a/src/IO/Archives/ZipArchiveWriter.h +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -4,6 +4,7 @@ #if USE_MINIZIP #include +#include #include @@ -22,7 +23,7 @@ public: /// Constructs an archive that will be written by using a specified `archive_write_buffer_`. ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_); - /// Destructors finalizes writing the archive. + /// Call finalize() before destructing IArchiveWriter. ~ZipArchiveWriter() override; /// Starts writing a file to the archive. The function returns a write buffer, @@ -35,6 +36,10 @@ public: /// This function should be used mostly for debugging purposes. bool isWritingFile() const override; + /// Finalizes writing of the archive. This function must be always called at the end of writing. + /// (Unless an error appeared and the archive is in fact no longer needed.) + void finalize() override; + /// Supported compression methods. static constexpr const char kStore[] = "store"; static constexpr const char kDeflate[] = "deflate"; @@ -68,22 +73,27 @@ public: static void checkEncryptionIsEnabled(); private: + class StreamInfo; + using ZipHandle = void *; class WriteBufferFromZipArchive; - class HandleHolder; - using RawHandle = void *; - HandleHolder acquireHandle(); - RawHandle acquireRawHandle(); - void releaseRawHandle(RawHandle raw_handle_); + int getCompressionMethod() const; + int getCompressionLevel() const; + String getPassword() const; - void checkResult(int code) const; - [[noreturn]] void showError(const String & message) const; + ZipHandle startWritingFile(); + void endWritingFile(); + + void checkResultCode(int code) const; const String path_to_archive; - int compression_method; /// By default the compression method is "deflate". - int compression_level = kDefaultCompressionLevel; - String password; - RawHandle handle = nullptr; + std::unique_ptr TSA_GUARDED_BY(mutex) stream_info; + int compression_method TSA_GUARDED_BY(mutex); /// By default the compression method is "deflate". + int compression_level TSA_GUARDED_BY(mutex) = kDefaultCompressionLevel; + String password TSA_GUARDED_BY(mutex); + ZipHandle zip_handle TSA_GUARDED_BY(mutex) = nullptr; + bool is_writing_file TSA_GUARDED_BY(mutex) = false; + bool finalized TSA_GUARDED_BY(mutex) = false; mutable std::mutex mutex; }; diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index 970afc75ec3..88073a72d78 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -133,6 +133,22 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings settings.http_receive_timeout); } +ConnectionTimeouts ConnectionTimeouts::getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings) +{ + auto timeouts = getHTTPTimeouts(user_settings, server_settings.keep_alive_timeout); + + if (server_settings.replicated_fetches_http_connection_timeout.changed) + timeouts.connection_timeout = server_settings.replicated_fetches_http_connection_timeout; + + if (server_settings.replicated_fetches_http_send_timeout.changed) + timeouts.send_timeout = server_settings.replicated_fetches_http_send_timeout; + + if (server_settings.replicated_fetches_http_receive_timeout.changed) + timeouts.receive_timeout = server_settings.replicated_fetches_http_receive_timeout; + + return timeouts; +} + class SendReceiveTimeoutsForFirstAttempt { private: diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index aabebdb836d..42c4312d1d8 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -68,6 +69,8 @@ struct ConnectionTimeouts static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings); static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout); + static ConnectionTimeouts getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings); + ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const; }; diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index e0c966fb700..3b3bdb5c564 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -34,6 +34,11 @@ bool ReadBufferFromIStream::nextImpl() ReadBufferFromIStream::ReadBufferFromIStream(std::istream & istr_, size_t size) : BufferWithOwnMemory(size), istr(istr_) { + /// - badbit will be set if some exception will be throw from ios implementation + /// - failbit can be set when for instance read() reads less data, so we + /// cannot set it, since we are requesting to read more data, then the + /// buffer has now. + istr.exceptions(std::ios::badbit); } } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 36cac929e3f..619fd40edc3 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -196,7 +196,7 @@ bool ReadBufferFromS3::nextImpl() next_result = impl->next(); break; } - catch (Exception & e) + catch (Poco::Exception & e) { if (!processException(e, getPosition(), attempt) || last_attempt) throw; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 4c8a6cb020a..a8a31d82e56 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -120,6 +120,7 @@ struct ReadSettings size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; bool http_skip_not_found_url_for_globs = true; + bool http_make_head_request = true; /// Monitoring bool for_object_storage = false; // to choose which profile events should be incremented diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 6dd6269e16f..297d73303c0 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -808,6 +808,11 @@ std::optional ReadWriteBufferFromHTTPBase::tryGetLa template HTTPFileInfo ReadWriteBufferFromHTTPBase::getFileInfo() { + /// May be disabled in case the user knows in advance that the server doesn't support HEAD requests. + /// Allows to avoid making unnecessary requests in such cases. + if (!settings.http_make_head_request) + return HTTPFileInfo{}; + Poco::Net::HTTPResponse response; try { @@ -920,13 +925,12 @@ PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP( Poco::URI uri_, const std::string & method_, OutStreamCallback out_stream_callback_, - const ConnectionTimeouts & timeouts_, const Poco::Net::HTTPBasicCredentials & credentials_, size_t buffer_size_, const UInt64 max_redirects, - size_t max_connections_per_endpoint) + PooledSessionFactoryPtr session_factory) : Parent( - std::make_shared(uri_, max_redirects, std::make_shared(timeouts_, max_connections_per_endpoint)), + std::make_shared(uri_, max_redirects, session_factory), uri_, credentials_, method_, diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 8f0e2388e5b..29c0804bb28 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -265,6 +265,8 @@ private: size_t per_endpoint_pool_size; }; +using PooledSessionFactoryPtr = std::shared_ptr; + class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>> { using SessionType = UpdatableSession; @@ -273,13 +275,12 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase public: explicit PooledReadWriteBufferFromHTTP( Poco::URI uri_, - const std::string & method_ = {}, - OutStreamCallback out_stream_callback_ = {}, - const ConnectionTimeouts & timeouts_ = {}, - const Poco::Net::HTTPBasicCredentials & credentials_ = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const UInt64 max_redirects = 0, - size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT); + const std::string & method_, + OutStreamCallback out_stream_callback_, + const Poco::Net::HTTPBasicCredentials & credentials_, + size_t buffer_size_, + const UInt64 max_redirects, + PooledSessionFactoryPtr session_factory); }; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 7658ea5941c..f1fe1af4117 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -574,6 +574,9 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const if (!new_uri) return result; + if (initial_endpoint.substr(11) == "amazonaws.com") // Check if user didn't mention any region + new_uri->addRegionToURI(request.getRegionOverride()); + const auto & current_uri_override = request.getURIOverride(); /// we already tried with this URI if (current_uri_override && current_uri_override->uri == new_uri->uri) diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index eae45491fe6..bfb94a5a67e 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -58,6 +58,11 @@ public: return BaseRequest::GetChecksumAlgorithmName(); } + std::string getRegionOverride() const + { + return region_override; + } + void overrideRegion(std::string region) const { region_override = std::move(region); diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index e05e0882329..e990875dd2f 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -146,6 +146,12 @@ URI::URI(const std::string & uri_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI."); } +void URI::addRegionToURI(const std::string ®ion) +{ + if (auto pos = endpoint.find("amazonaws.com"); pos != std::string::npos) + endpoint = endpoint.substr(0, pos) + region + "." + endpoint.substr(pos); +} + void URI::validateBucket(const String & bucket, const Poco::URI & uri) { /// S3 specification requires at least 3 and at most 63 characters in bucket name. diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index f8f40cf9108..2873728bc78 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -32,6 +32,7 @@ struct URI URI() = default; explicit URI(const std::string & uri_); + void addRegionToURI(const std::string & region); static void validateBucket(const std::string & bucket, const Poco::URI & uri); }; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index ffd6b6d711f..96ad6413ef5 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -109,6 +109,8 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const { auto access_key_id = config.getString(config_elem + ".access_key_id", ""); auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); + auto session_token = config.getString(config_elem + ".session_token", ""); + auto region = config.getString(config_elem + ".region", ""); auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); @@ -133,7 +135,7 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const return AuthSettings { - std::move(access_key_id), std::move(secret_access_key), + std::move(access_key_id), std::move(secret_access_key), std::move(session_token), std::move(region), std::move(server_side_encryption_customer_key_base64), std::move(sse_kms_config), @@ -155,6 +157,8 @@ void AuthSettings::updateFrom(const AuthSettings & from) access_key_id = from.access_key_id; if (!from.secret_access_key.empty()) secret_access_key = from.secret_access_key; + if (!from.session_token.empty()) + session_token = from.session_token; headers = from.headers; region = from.region; diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 8c45c1c34a7..ebfc07a3976 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -80,6 +80,7 @@ struct AuthSettings std::string access_key_id; std::string secret_access_key; + std::string session_token; std::string region; std::string server_side_encryption_customer_key_base64; ServerSideEncryptionKMSConfig server_side_encryption_kms_config; diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp index 6af5aab7a38..c8506663bc8 100644 --- a/src/IO/SharedThreadPools.cpp +++ b/src/IO/SharedThreadPools.cpp @@ -66,6 +66,8 @@ void StaticThreadPool::reloadConfiguration(size_t max_threads, size_t max_free_t if (!instance) throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name); + std::lock_guard lock(mutex); + instance->setMaxThreads(turbo_mode_enabled > 0 ? max_threads_turbo : max_threads); instance->setMaxFreeThreads(max_free_threads); instance->setQueueSize(queue_size); diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index b48955c25e7..37fbdff901a 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -102,7 +102,8 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive) { /// Make an archive. { - createArchiveWriter(getPathToArchive()); + auto writer = createArchiveWriter(getPathToArchive()); + writer->finalize(); } /// The created archive can be found in the local filesystem. @@ -132,7 +133,9 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive) { auto out = writer->writeFile("a.txt"); writeString(contents, *out); + out->finalize(); } + writer->finalize(); } /// Read the archive. @@ -198,11 +201,14 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) { auto out = writer->writeFile("a.txt"); writeString(a_contents, *out); + out->finalize(); } { auto out = writer->writeFile("b/c.txt"); writeString(c_contents, *out); + out->finalize(); } + writer->finalize(); } /// Read the archive. @@ -281,11 +287,14 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory) { auto out = writer->writeFile("a.txt"); writeString(a_contents, *out); + out->finalize(); } { auto out = writer->writeFile("b.txt"); writeString(b_contents, *out); + out->finalize(); } + writer->finalize(); } /// The created archive is really in memory. @@ -335,7 +344,9 @@ TEST_P(ArchiveReaderAndWriterTest, Password) { auto out = writer->writeFile("a.txt"); writeString(contents, *out); + out->finalize(); } + writer->finalize(); } /// Read the archive. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 6be9f6c803f..827914eaefe 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1414,7 +1414,10 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool set_key = right_in_operand->getTreeHash(/*ignore_aliases=*/ true); if (auto set = data.prepared_sets->findSubquery(set_key)) + { + set->markAsINSubquery(); return set; + } FutureSetPtr external_table_set; @@ -1460,7 +1463,8 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool interpreter->buildQueryPlan(*source); } - return data.prepared_sets->addFromSubquery(set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef()); + return data.prepared_sets->addFromSubquery( + set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef(), /*in_subquery=*/true); } else { diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 08d159b42ca..27639c4b813 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -148,8 +148,6 @@ private: { if (table_expression.database_and_table_name) tryVisit(table_expression.database_and_table_name); - else if (table_expression.subquery) - tryVisit(table_expression.subquery); } void visit(const ASTTableIdentifier & identifier, ASTPtr & ast) const @@ -167,11 +165,6 @@ private: ast = qualified_identifier; } - void visit(ASTSubquery & subquery, ASTPtr &) const - { - tryVisit(subquery.children[0]); - } - void visit(ASTFunction & function, ASTPtr &) const { bool is_operator_in = functionIsInOrGlobalInOperator(function.name); diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index 4953a2140ea..e49bb28bd45 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -27,6 +27,7 @@ NamesAndTypesList BackupLogElement::getNamesAndTypes() {"event_time_microseconds", std::make_shared(6)}, {"id", std::make_shared()}, {"name", std::make_shared()}, + {"base_backup_name", std::make_shared()}, {"status", std::make_shared(getBackupStatusEnumValues())}, {"error", std::make_shared()}, {"start_time", std::make_shared()}, @@ -49,6 +50,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(event_time_usec); columns[i++]->insert(info.id); columns[i++]->insert(info.name); + columns[i++]->insert(info.base_backup_name); columns[i++]->insert(static_cast(info.status)); columns[i++]->insert(info.error_message); columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 1dd94719ae4..6b627cb07b3 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -707,7 +707,7 @@ KeyMetadata::iterator FileCache::addFileSegment( stash_records.emplace( stash_key, stash->queue->add(locked_key.getKeyMetadata(), offset, 0, *lock)); - if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit()) + if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit(*lock)) stash->queue->pop(*lock); result_state = FileSegment::State::DETACHED; @@ -748,7 +748,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa LOG_TEST( log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}", size, file_segment.key(), file_segment.offset(), - main_priority->getSize(cache_lock), main_priority->getSizeLimit()); + main_priority->getSize(cache_lock), main_priority->getSizeLimit(cache_lock)); /// In case of per query cache limit (by default disabled), we add/remove entries from both /// (main_priority and query_priority) priority queues, but iterate entries in order of query_priority, @@ -760,7 +760,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa { query_priority = &query_context->getPriority(); - const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit(); + const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit(cache_lock); if (query_limit_exceeded && !query_context->recacheOnFileCacheQueryLimitExceeded()) { LOG_TEST(log, "Query limit exceeded, space reservation failed, " @@ -771,7 +771,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa LOG_TEST( log, "Using query limit, current usage: {}/{} (while reserving for {}:{})", - query_priority->getSize(cache_lock), query_priority->getSizeLimit(), + query_priority->getSize(cache_lock), query_priority->getSizeLimit(cache_lock), file_segment.key(), file_segment.offset()); } @@ -1066,9 +1066,11 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) bool limits_satisfied; IFileCachePriority::IteratorPtr cache_it; + size_t size_limit = 0; { auto lock = lockCache(); + size_limit = main_priority->getSizeLimit(lock); limits_satisfied = main_priority->canFit(size, lock); if (limits_satisfied) @@ -1118,7 +1120,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) log, "Cache capacity changed (max size: {}), " "cached file `{}` does not fit in cache anymore (size: {})", - main_priority->getSizeLimit(), offset_it->path().string(), size); + size_limit, offset_it->path().string(), size); fs::remove(offset_it->path()); } @@ -1222,7 +1224,8 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, std::lock_guard lock(apply_settings_mutex); - if (metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit)) + if (new_settings.background_download_queue_size_limit != actual_settings.background_download_queue_size_limit + && metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit)) { LOG_INFO(log, "Changed background_download_queue_size from {} to {}", actual_settings.background_download_queue_size_limit, @@ -1231,24 +1234,57 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, actual_settings.background_download_queue_size_limit = new_settings.background_download_queue_size_limit; } - bool updated; - try + if (new_settings.background_download_threads != actual_settings.background_download_threads) { - updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads); - } - catch (...) - { - actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads(); - throw; + bool updated = false; + try + { + updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads); + } + catch (...) + { + actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads(); + throw; + } + + if (updated) + { + LOG_INFO(log, "Changed background_download_threads from {} to {}", + actual_settings.background_download_threads, + new_settings.background_download_threads); + + actual_settings.background_download_threads = new_settings.background_download_threads; + } } - if (updated) - { - LOG_INFO(log, "Changed background_download_threads from {} to {}", - actual_settings.background_download_threads, - new_settings.background_download_threads); - actual_settings.background_download_threads = new_settings.background_download_threads; + if (new_settings.max_size != actual_settings.max_size + || new_settings.max_elements != actual_settings.max_elements) + { + auto cache_lock = lockCache(); + + bool updated = false; + try + { + updated = main_priority->modifySizeLimits( + new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); + } + catch (...) + { + actual_settings.max_size = main_priority->getSizeLimit(cache_lock); + actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); + throw; + } + + if (updated) + { + LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); + + actual_settings.max_size = main_priority->getSizeLimit(cache_lock); + actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); + } } } diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index 9ba96de26dc..84eafde9afd 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -25,6 +25,12 @@ FileCacheSettings FileCacheFactory::FileCacheData::getSettings() const return settings; } +void FileCacheFactory::FileCacheData::setSettings(const FileCacheSettings & new_settings) +{ + std::lock_guard lock(settings_mutex); + settings = new_settings; +} + FileCacheFactory & FileCacheFactory::instance() { static FileCacheFactory ret; @@ -100,21 +106,23 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig FileCacheSettings new_settings; new_settings.loadFromConfig(config, cache_info->config_path); - FileCacheSettings old_settings; - { - std::lock_guard lock(cache_info->settings_mutex); - if (new_settings == cache_info->settings) - continue; + FileCacheSettings old_settings = cache_info->getSettings(); + if (old_settings == new_settings) + continue; - old_settings = cache_info->settings; + try + { + cache_info->cache->applySettingsIfPossible(new_settings, old_settings); + } + catch (...) + { + /// Settings changes could be partially applied in case of exception, + /// make sure cache_info->settings show correct state of applied settings. + cache_info->setSettings(old_settings); + throw; } - cache_info->cache->applySettingsIfPossible(new_settings, old_settings); - - { - std::lock_guard lock(cache_info->settings_mutex); - cache_info->settings = old_settings; - } + cache_info->setSettings(old_settings); } } diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h index 2148e520fd1..c60b247005b 100644 --- a/src/Interpreters/Cache/FileCacheFactory.h +++ b/src/Interpreters/Cache/FileCacheFactory.h @@ -24,6 +24,8 @@ public: FileCacheSettings getSettings() const; + void setSettings(const FileCacheSettings & new_settings); + const FileCachePtr cache; const std::string config_path; diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 3a74bbfd460..c07f6fb9fb4 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -55,9 +55,9 @@ public: virtual ~IFileCachePriority() = default; - size_t getElementsLimit() const { return max_elements; } + size_t getElementsLimit(const CacheGuard::Lock &) const { return max_elements; } - size_t getSizeLimit() const { return max_size; } + size_t getSizeLimit(const CacheGuard::Lock &) const { return max_size; } virtual size_t getSize(const CacheGuard::Lock &) const = 0; @@ -86,9 +86,11 @@ public: FinalizeEvictionFunc & finalize_eviction_func, const CacheGuard::Lock &) = 0; + virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) = 0; + protected: - const size_t max_size = 0; - const size_t max_elements = 0; + size_t max_size = 0; + size_t max_elements = 0; }; } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 1cba68eb405..2155d2e1f8b 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -16,6 +16,9 @@ namespace ProfileEvents { extern const Event FilesystemCacheEvictionSkippedFileSegments; extern const Event FilesystemCacheEvictionTries; + extern const Event FilesystemCacheEvictMicroseconds; + extern const Event FilesystemCacheEvictedBytes; + extern const Event FilesystemCacheEvictedFileSegments; } namespace DB @@ -36,7 +39,7 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT return std::make_shared(add(Entry(key_metadata->key, offset, size, key_metadata), lock)); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock & lock) { if (entry.size == 0) { @@ -59,7 +62,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, cons } #endif - const auto & size_limit = getSizeLimit(); + const auto & size_limit = getSizeLimit(lock); if (size_limit && current_size + entry.size > size_limit) { throw Exception( @@ -288,6 +291,51 @@ std::vector LRUFileCachePriority::dump(const CacheGuard::Lock & return res; } +bool LRUFileCachePriority::modifySizeLimits( + size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CacheGuard::Lock & lock) +{ + if (max_size == max_size_ && max_elements == max_elements_) + return false; /// Nothing to change. + + auto check_limits_satisfied = [&]() + { + return (max_size_ == 0 || current_size <= max_size_) + && (max_elements_ == 0 || current_elements_num <= max_elements_); + }; + + if (check_limits_satisfied()) + { + max_size = max_size_; + max_elements = max_elements_; + return true; + } + + auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) + { + chassert(segment_metadata->file_segment->assertCorrectness()); + + if (!segment_metadata->releasable()) + return IterationResult::CONTINUE; + + auto segment = segment_metadata->file_segment; + locked_key.removeFileSegment(segment->offset(), segment->lock()); + + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize()); + return IterationResult::REMOVE_AND_CONTINUE; + }; + + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); + iterate( + [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) + { return check_limits_satisfied() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, + lock); + + max_size = max_size_; + max_elements = max_elements_; + return true; +} + void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock) { assertValid(); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 15d7b728f94..ed6ec405395 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -48,6 +48,8 @@ public: void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); } + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + private: void updateElementsCount(int64_t num); void updateSize(int64_t size); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 6e3a97fb8d1..231c3d4a8d6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -687,7 +687,7 @@ void CacheMetadata::startup() download_threads.emplace_back(std::make_shared()); download_threads.back()->thread = std::make_unique([this, thread = download_threads.back()] { downloadThreadFunc(thread->stop_flag); }); } - cleanup_thread = std::make_unique(std::function{ [this]{ cleanupThreadFunc(); }}); + cleanup_thread = std::make_unique([this]{ cleanupThreadFunc(); }); } void CacheMetadata::shutdown() @@ -714,10 +714,10 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num) if (threads_num == download_threads_num) return false; + SCOPE_EXIT({ download_threads_num = download_threads.size(); }); + if (threads_num > download_threads_num) { - SCOPE_EXIT({ download_threads_num = download_threads.size(); }); - size_t add_threads = threads_num - download_threads_num; for (size_t i = 0; i < add_threads; ++i) { @@ -745,7 +745,6 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num) } download_queue->cv.notify_all(); - SCOPE_EXIT({ download_threads_num = download_threads.size(); }); for (size_t i = 0; i < remove_threads; ++i) { diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index d778ae456ff..8b46712731c 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -21,14 +21,15 @@ namespace SLRUFileCachePriority::SLRUFileCachePriority( size_t max_size_, size_t max_elements_, - double size_ratio) + double size_ratio_) : IFileCachePriority(max_size_, max_elements_) + , size_ratio(size_ratio_) , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio))) , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio))) { LOG_DEBUG( log, "Using probationary queue size: {}, protected queue size: {}", - probationary_queue.getSizeLimit(), protected_queue.getSizeLimit()); + probationary_queue.max_size, protected_queue.max_elements); } size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const @@ -151,7 +152,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach /// Entry is in probationary queue. /// We need to move it to protected queue. const size_t size = iterator.getEntry().size; - if (size > protected_queue.getSizeLimit()) + if (size > protected_queue.getSizeLimit(lock)) { /// Entry size is bigger than the whole protected queue limit. /// This is only possible if protected_queue_size_limit is less than max_file_segment_size, @@ -235,6 +236,21 @@ void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock) probationary_queue.shuffle(lock); } +bool SLRUFileCachePriority::modifySizeLimits( + size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock & lock) +{ + if (max_size == max_size_ && max_elements == max_elements_ && size_ratio == size_ratio_) + return false; /// Nothing to change. + + protected_queue.modifySizeLimits(getRatio(max_size_, size_ratio_), getRatio(max_elements_, size_ratio_), 0, lock); + probationary_queue.modifySizeLimits(getRatio(max_size_, 1 - size_ratio_), getRatio(max_elements_, 1 - size_ratio_), 0, lock); + + max_size = max_size_; + max_elements = max_elements_; + size_ratio = size_ratio_; + return true; +} + SLRUFileCachePriority::SLRUIterator::SLRUIterator( SLRUFileCachePriority * cache_priority_, LRUFileCachePriority::LRUIterator && lru_iterator_, diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index 1e14562a71e..b9ea246bc83 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -18,7 +18,7 @@ private: public: class SLRUIterator; - SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio); + SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio_); size_t getSize(const CacheGuard::Lock & lock) const override; @@ -45,7 +45,10 @@ public: std::vector dump(const CacheGuard::Lock &) override; + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + private: + double size_ratio; LRUFileCachePriority protected_queue; LRUFileCachePriority probationary_queue; Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority"); diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3a634f08b83..18f7280dd19 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -135,7 +135,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, } /// disable parallel replicas if cluster contains only shards with 1 replica - if (context->canUseParallelReplicas()) + if (context->canUseTaskBasedParallelReplicas()) { bool disable_parallel_replicas = true; for (const auto & shard : cluster.getShardsInfo()) @@ -265,7 +265,7 @@ void executeQuery( // decide for each shard if parallel reading from replicas should be enabled // according to settings and number of replicas declared per shard const auto & addresses = cluster->getShardsAddresses().at(i); - bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas(); + bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas(); stream_factory.createForShard( shard_info, @@ -382,7 +382,6 @@ void executeQueryWithParallelReplicas( shard_num = column->getUInt(0); } - size_t all_replicas_count = 0; ClusterPtr new_cluster; /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard /// shards are numbered in order of appearance in the cluster config @@ -406,16 +405,14 @@ void executeQueryWithParallelReplicas( // shard_num is 1-based, but getClusterWithSingleShard expects 0-based index auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1); // convert cluster to representation expected by parallel replicas - new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings); + new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas); } else { - new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings); + new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas); } - all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); - - auto coordinator = std::make_shared(all_replicas_count); + auto coordinator = std::make_shared(new_cluster->getShardCount()); auto external_tables = new_context->getExternalTables(); auto read_from_remote = std::make_unique( query_ast, diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a24f947b13c..25146ebc10d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -361,6 +362,8 @@ struct ContextSharedPart : boost::noncopyable OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex); OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex); + /// The global pool of HTTP sessions for background fetches. + PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex); RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml @@ -4050,7 +4053,8 @@ void Context::checkCanBeDropped(const String & database, const String & table, c "2. File '{}' intended to force DROP {}\n" "How to fix this:\n" "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n" - "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n" + "2. Either pass a bigger (or set to zero) max_[table/partition]_size_to_drop through query settings\n" + "3. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n" "Example:\nsudo touch '{}' && sudo chmod 666 '{}'", backQuoteIfNeed(database), backQuoteIfNeed(table), size_str, max_size_to_drop_str, @@ -4078,6 +4082,10 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab checkCanBeDropped(database, table, table_size, max_table_size_to_drop); } +void Context::checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const +{ + checkCanBeDropped(database, table, table_size, max_table_size_to_drop); +} void Context::setMaxPartitionSizeToDrop(size_t max_size) { @@ -4097,6 +4105,10 @@ void Context::checkPartitionCanBeDropped(const String & database, const String & checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); } +void Context::checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const +{ + checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); +} InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, const std::optional max_parsing_threads) const { @@ -4527,7 +4539,7 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w if (!storage_id) { if (exception) - exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Both table name and UUID are empty"); + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Both table name and UUID are empty")); return storage_id; } @@ -4588,7 +4600,7 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w if (current_database.empty()) { if (exception) - exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Default database is not selected"); + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Default database is not selected")); return StorageID::createEmpty(); } storage_id.database_name = current_database; @@ -4815,6 +4827,11 @@ void Context::initializeBackgroundExecutorsIfNeeded() ); LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size); + auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef()); + /// The number of background fetches is limited by the number of threads in the background thread pool. + /// It doesn't make any sense to limit the number of connections per host any further. + shared->fetches_session_factory = std::make_shared(timeouts, background_fetches_pool_size); + shared->fetch_executor = std::make_shared ( "Fetch", @@ -4868,6 +4885,12 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } +PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const +{ + SharedLockGuard lock(shared->background_executors_mutex); + return shared->fetches_session_factory; +} + IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const { callOnce(shared->readers_initialized, [&] { @@ -4973,6 +4996,7 @@ ReadSettings Context::getReadSettings() const res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms; res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms; res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs; + res.http_make_head_request = settings.http_make_head_request; res.mmap_cache = getMMappedFileCache().get(); @@ -5017,7 +5041,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const return SAMPLE_KEY; } -bool Context::canUseParallelReplicas() const +bool Context::canUseTaskBasedParallelReplicas() const { const auto & settings_ref = getSettingsRef(); return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1; @@ -5025,12 +5049,12 @@ bool Context::canUseParallelReplicas() const bool Context::canUseParallelReplicasOnInitiator() const { - return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator; + return canUseTaskBasedParallelReplicas() && !getClientInfo().collaborate_with_initiator; } bool Context::canUseParallelReplicasOnFollower() const { - return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator; + return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index be14dd7e6e6..39d2212ce80 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -202,6 +202,9 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; +class PooledSessionFactory; +using PooledSessionFactoryPtr = std::shared_ptr; + class SessionTracker; struct ServerSettings; @@ -1081,11 +1084,13 @@ public: void setMaxTableSizeToDrop(size_t max_size); size_t getMaxTableSizeToDrop() const; void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const; + void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const; /// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check) void setMaxPartitionSizeToDrop(size_t max_size); size_t getMaxPartitionSizeToDrop() const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const; + void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const; /// Lets you select the compression codec according to the conditions described in the configuration file. std::shared_ptr chooseCompressionCodec(size_t part_size, double part_size_ratio) const; @@ -1211,6 +1216,7 @@ public: OrdinaryBackgroundExecutorPtr getMovesExecutor() const; OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; + PooledSessionFactoryPtr getCommonFetchesSessionFactory() const; IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING @@ -1228,7 +1234,7 @@ public: WriteSettings getWriteSettings() const; /** There are multiple conditions that have to be met to be able to use parallel replicas */ - bool canUseParallelReplicas() const; + bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index c388ade9062..fc1975e8c86 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -331,7 +331,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( if (!table_id) { if (exception) - exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Cannot find table: StorageID is empty"); + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot find table: StorageID is empty")); return {}; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3b389dcf61e..969c57535f9 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -858,11 +859,8 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const bool ExpressionAnalyzer::isRemoteStorage() const { - const Settings & csettings = getContext()->getSettingsRef(); // Consider any storage used in parallel replicas as remote, so the query is executed in multiple servers - const bool enable_parallel_processing_of_joins - = csettings.max_parallel_replicas > 1 && csettings.allow_experimental_parallel_reading_from_replicas > 0; - return syntax->is_remote_storage || enable_parallel_processing_of_joins; + return syntax->is_remote_storage || getContext()->canUseTaskBasedParallelReplicas(); } const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const @@ -954,6 +952,9 @@ static std::shared_ptr tryCreateJoin( std::unique_ptr & joined_plan, ContextPtr context) { + if (analyzed_join->kind() == JoinKind::Paste) + return std::make_shared(analyzed_join, right_sample_block); + if (algorithm == JoinAlgorithm::DIRECT || algorithm == JoinAlgorithm::DEFAULT) { JoinPtr direct_join = tryKeyValueJoin(analyzed_join, right_sample_block); diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index ddeb4bcef2c..db93467c0a4 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -155,6 +155,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); + if (!getContext()->getSettings().allow_experimental_statistic && ( command_ast->type == ASTAlterCommand::ADD_STATISTIC || command_ast->type == ASTAlterCommand::DROP_STATISTIC || @@ -407,6 +408,7 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS break; } case ASTAlterCommand::DELETE: + case ASTAlterCommand::APPLY_DELETED_MASK: case ASTAlterCommand::DROP_PARTITION: case ASTAlterCommand::DROP_DETACHED_PARTITION: { diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index b155476fd79..ea59115b077 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { + FunctionNameNormalizer().visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 3b47a002e50..ed29c82a0f0 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ namespace ErrorCodes BlockIO InterpreterCreateIndexQuery::execute() { + FunctionNameNormalizer().visit(query_ptr.get()); auto current_context = getContext(); const auto & create_index = query_ptr->as(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index bf07f4ed3ee..1eadb325e95 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1270,6 +1270,23 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (database && database->getEngineName() == "Replicated" && create.select) + { + bool is_storage_replicated = false; + if (create.storage && create.storage->engine) + { + const auto & storage_name = create.storage->engine->name; + if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared")) + is_storage_replicated = true; + } + + const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + if (!allow_create_select_for_replicated) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + } + if (need_add_to_database && database && database->shouldReplicateQuery(getContext(), query_ptr)) { chassert(!ddl_guard); @@ -1730,7 +1747,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont throw Exception(ErrorCodes::INCORRECT_QUERY, "Seems like cluster is configured for cross-replication, " - "but zookeeper_path for ReplicatedMergeTree is not specified or contains {uuid} macro. " + "but zookeeper_path for ReplicatedMergeTree is not specified or contains {{uuid}} macro. " "It's not supported for cross replication, because tables must have different UUIDs. " "Please specify unique zookeeper_path explicitly."); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 67245438156..8543b5ca552 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -481,7 +481,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Check support for FINAL for parallel replicas bool is_query_with_final = isQueryWithFinal(query_info); - if (is_query_with_final && settings.allow_experimental_parallel_reading_from_replicas > 0) + if (is_query_with_final && context->canUseTaskBasedParallelReplicas()) { if (settings.allow_experimental_parallel_reading_from_replicas == 1) { @@ -870,7 +870,38 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() ASTSelectQuery & query = getSelectQuery(); /// While only_analyze we don't know anything about parts, so any decision about how many parallel replicas to use would be wrong - if (!storage || options.only_analyze || !context->canUseParallelReplicasOnInitiator()) + if (!storage || !context->canUseParallelReplicasOnInitiator()) + return false; + + /// check if IN operator with subquery is present in the query + /// if so, disable parallel replicas + if (query_analyzer->getPreparedSets()->hasSubqueries()) + { + bool in_subqueries = false; + const auto & sets = query_analyzer->getPreparedSets(); + const auto subqueries = sets->getSubqueries(); + for (const auto & subquery : subqueries) + { + if (subquery->isINSubquery()) + { + in_subqueries = true; + break; + } + } + + if (in_subqueries) + { + if (settings.allow_experimental_parallel_reading_from_replicas == 2) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); + + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("max_parallel_replicas", UInt64{0}); + LOG_DEBUG(log, "Disabling parallel replicas to execute a query with IN with subquery"); + return true; + } + } + + if (options.only_analyze) return false; if (getTrivialCount(0).has_value()) @@ -1698,7 +1729,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

pipelineType() == JoinPipelineType::YShaped) + if (expressions.join->pipelineType() == JoinPipelineType::YShaped && expressions.join->getTableJoin().kind() != JoinKind::Paste) { const auto & table_join = expressions.join->getTableJoin(); const auto & join_clause = table_join.getOnlyClause(); @@ -2942,6 +2973,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan) auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), window.full_sort_description, + window.partition_by, 0 /* LIMIT */, sort_settings, settings.optimize_sorting_by_input_stream_properties); diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 52b2744b64d..fc040e2af04 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -964,7 +964,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) if (auto * replicated = dynamic_cast(database.get())) { check_not_local_replica(replicated, query); - DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica, /*throw_if_noop*/ true); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase()); @@ -989,7 +989,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) } check_not_local_replica(replicated, query); - DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica, /*throw_if_noop*/ false); LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName())); } } @@ -1002,7 +1002,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) if (auto * replicated = dynamic_cast(elem.second.get())) check_not_local_replica(replicated, query); - DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.shard, query.replica); + DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.shard, query.replica, /*throw_if_noop*/ true); LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path); } else diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index e1af704a358..8865c47a785 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -61,7 +61,7 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() storage = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (!storage) - throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist.", table_id.getNameForLogs()); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist.", table_id.getNameForLogs()); auto storage_name = storage->getName(); if (storage_name == "LiveView" diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 949a97d5748..6bd202a1dd7 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -345,27 +345,6 @@ ColumnRawPtrs getRawPointers(const Columns & columns) return ptrs; } -void convertToFullColumnsInplace(Block & block) -{ - for (size_t i = 0; i < block.columns(); ++i) - { - auto & col = block.getByPosition(i); - col.column = recursiveRemoveLowCardinality(recursiveRemoveSparse(col.column)); - col.type = recursiveRemoveLowCardinality(col.type); - } -} - -void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type) -{ - for (const String & column_name : names) - { - auto & col = block.getByName(column_name); - col.column = recursiveRemoveLowCardinality(recursiveRemoveSparse(col.column)); - if (change_type) - col.type = recursiveRemoveLowCardinality(col.type); - } -} - void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys) { for (const auto & column_name : lowcard_keys) @@ -495,8 +474,8 @@ void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count) bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type) { - DataTypePtr left_type_strict = removeNullable(recursiveRemoveLowCardinality(left_type)); - DataTypePtr right_type_strict = removeNullable(recursiveRemoveLowCardinality(right_type)); + DataTypePtr left_type_strict = removeNullable(removeLowCardinality(left_type)); + DataTypePtr right_type_strict = removeNullable(removeLowCardinality(right_type)); return left_type_strict->equals(*right_type_strict); } diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index a88fca02bd8..ff48f34d82c 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -71,8 +71,6 @@ ColumnPtr materializeColumn(const Block & block, const String & name); Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); -void convertToFullColumnsInplace(Block & block); -void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type = true); void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys); ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 30c62386ca3..f0427b5a6ca 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -138,6 +138,9 @@ Block extractMinMax(const Block & block, const Block & keys) } min_max.setColumns(std::move(columns)); + + for (auto & column : min_max) + column.column = column.column->convertToFullColumnIfLowCardinality(); return min_max; } @@ -224,6 +227,16 @@ public: MergeJoinCursor(const Block & block, const SortDescription & desc_) : impl(block, desc_) { + for (auto *& column : impl.sort_columns) + { + const auto * lowcard_column = typeid_cast(column); + if (lowcard_column) + { + auto & new_col = column_holder.emplace_back(lowcard_column->convertToFullColumn()); + column = new_col.get(); + } + } + /// SortCursorImpl can work with permutation, but MergeJoinCursor can't. if (impl.permutation) throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation"); @@ -287,6 +300,7 @@ public: private: SortCursorImpl impl; + Columns column_holder; bool has_left_nullable = false; bool has_right_nullable = false; @@ -537,9 +551,6 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right lowcard_right_keys.push_back(right_key); } - JoinCommon::convertToFullColumnsInplace(right_table_keys); - JoinCommon::convertToFullColumnsInplace(right_sample_block, key_names_right); - for (const auto & column : right_table_keys) if (required_right_keys.contains(column.name)) right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name}); @@ -662,9 +673,7 @@ bool MergeJoin::saveRightBlock(Block && block) Block MergeJoin::modifyRightBlock(const Block & src_block) const { - Block block = materializeBlock(src_block); - JoinCommon::convertToFullColumnsInplace(block, table_join->getOnlyClause().key_names_right); - return block; + return materializeBlock(src_block); } bool MergeJoin::addBlockToJoin(const Block & src_block, bool) @@ -705,8 +714,6 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) lowcard_keys.push_back(column_name); } - JoinCommon::convertToFullColumnsInplace(block, key_names_left, false); - sortBlock(block, left_sort_description); } @@ -739,8 +746,6 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) if (needConditionJoinColumn()) block.erase(deriveTempName(mask_column_name_left, JoinTableSide::Left)); - - JoinCommon::restoreLowCardinalityInplace(block, lowcard_keys); } template diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 8e56b08f1ed..bf50766c165 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -153,19 +154,29 @@ bool isStorageTouchedByMutations( return false; bool all_commands_can_be_skipped = true; - for (const MutationCommand & command : commands) + for (const auto & command : commands) { - if (!command.predicate) /// The command touches all rows. - return true; - - if (command.partition) + if (command.type == MutationCommand::APPLY_DELETED_MASK) { - const String partition_id = storage.getPartitionIDFromQuery(command.partition, context); - if (partition_id == source_part->info.partition_id) - all_commands_can_be_skipped = false; + if (source_part->hasLightweightDelete()) + return true; } else - all_commands_can_be_skipped = false; + { + if (!command.predicate) /// The command touches all rows. + return true; + + if (command.partition) + { + const String partition_id = storage.getPartitionIDFromQuery(command.partition, context); + if (partition_id == source_part->info.partition_id) + all_commands_can_be_skipped = false; + } + else + { + all_commands_can_be_skipped = false; + } + } } if (all_commands_can_be_skipped) @@ -211,7 +222,6 @@ bool isStorageTouchedByMutations( return count != 0; } - ASTPtr getPartitionAndPredicateExpressionForMutationCommand( const MutationCommand & command, const StoragePtr & storage, @@ -244,6 +254,32 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func; } + +MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) +{ + if (command.type != MutationCommand::APPLY_DELETED_MASK) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected APPLY_DELETED_MASK mutation command, got: {}", magic_enum::enum_name(command.type)); + + auto alter_command = std::make_shared(); + alter_command->type = ASTAlterCommand::DELETE; + alter_command->partition = command.partition; + + auto row_exists_predicate = makeASTFunction("equals", + std::make_shared(LightweightDeleteDescription::FILTER_COLUMN.name), + std::make_shared(Field(0))); + + if (command.predicate) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Mutation command APPLY DELETED MASK does not support WHERE clause"); + + alter_command->predicate = row_exists_predicate; + + auto mutation_command = MutationCommand::parse(alter_command.get()); + if (!mutation_command) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to parse command {}. It's a bug", queryToString(alter_command)); + + return *mutation_command; +} + MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_)) { } @@ -517,15 +553,18 @@ void MutationsInterpreter::prepare(bool dry_run) NameSet updated_columns; bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); - for (const MutationCommand & command : commands) + for (auto & command : commands) { - if (command.type == MutationCommand::Type::UPDATE - || command.type == MutationCommand::Type::DELETE) + if (command.type == MutationCommand::Type::APPLY_DELETED_MASK) + command = createCommandToApplyDeletedMask(command); + + if (command.type == MutationCommand::Type::UPDATE || command.type == MutationCommand::Type::DELETE) materialize_ttl_recalculate_only = false; for (const auto & [name, _] : command.column_to_update_expression) { - if (!available_columns_set.contains(name) && name != LightweightDeleteDescription::FILTER_COLUMN.name + if (!available_columns_set.contains(name) + && name != LightweightDeleteDescription::FILTER_COLUMN.name && name != BlockNumberColumn::name) throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is updated but not requested to read", name); @@ -574,7 +613,7 @@ void MutationsInterpreter::prepare(bool dry_run) std::vector read_columns; /// First, break a sequence of commands into stages. - for (auto & command : commands) + for (const auto & command : commands) { // we can return deleted rows only if it's the only present command assert(command.type == MutationCommand::DELETE || command.type == MutationCommand::UPDATE || !settings.return_mutated_rows); @@ -585,7 +624,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (stages.empty() || !stages.back().column_to_updated.empty()) stages.emplace_back(context); - auto predicate = getPartitionAndPredicateExpressionForMutationCommand(command); + auto predicate = getPartitionAndPredicateExpressionForMutationCommand(command); if (!settings.return_mutated_rows) predicate = makeASTFunction("isZeroOrNull", predicate); @@ -605,16 +644,12 @@ void MutationsInterpreter::prepare(bool dry_run) NameSet affected_materialized; - for (const auto & kv : command.column_to_update_expression) + for (const auto & [column_name, update_expr] : command.column_to_update_expression) { - const String & column = kv.first; - - auto materialized_it = column_to_affected_materialized.find(column); + auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) - { - for (const String & mat_column : materialized_it->second) + for (const auto & mat_column : materialized_it->second) affected_materialized.emplace(mat_column); - } /// When doing UPDATE column = expression WHERE condition /// we will replace column to the result of the following expression: @@ -627,33 +662,39 @@ void MutationsInterpreter::prepare(bool dry_run) /// Outer CAST is added just in case if we don't trust the returning type of 'if'. DataTypePtr type; - if (auto physical_column = columns_desc.tryGetPhysical(column)) + if (auto physical_column = columns_desc.tryGetPhysical(column_name)) + { type = physical_column->type; - else if (column == LightweightDeleteDescription::FILTER_COLUMN.name) + } + else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + { type = LightweightDeleteDescription::FILTER_COLUMN.type; - else if (column == BlockNumberColumn::name) + deleted_mask_updated = true; + } + else if (column_name == BlockNumberColumn::name) + { type = BlockNumberColumn::type; + } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column); + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name); + } auto type_literal = std::make_shared(type->getName()); - - const auto & update_expr = kv.second; - ASTPtr condition = getPartitionAndPredicateExpressionForMutationCommand(command); /// And new check validateNestedArraySizes for Nested subcolumns - if (isArray(type) && !Nested::splitName(column).second.empty()) + if (isArray(type) && !Nested::splitName(column_name).second.empty()) { std::shared_ptr function = nullptr; - auto nested_update_exprs = getExpressionsOfUpdatedNestedSubcolumns(column, all_columns, command.column_to_update_expression); + auto nested_update_exprs = getExpressionsOfUpdatedNestedSubcolumns(column_name, all_columns, command.column_to_update_expression); if (!nested_update_exprs) { function = makeASTFunction("validateNestedArraySizes", condition, update_expr->clone(), - std::make_shared(column)); + std::make_shared(column_name)); condition = makeASTFunction("and", condition, function); } else if (nested_update_exprs->size() > 1) @@ -675,10 +716,10 @@ void MutationsInterpreter::prepare(bool dry_run) makeASTFunction("_CAST", update_expr->clone(), type_literal), - std::make_shared(column)), + std::make_shared(column_name)), type_literal); - stages.back().column_to_updated.emplace(column, updated_column); + stages.back().column_to_updated.emplace(column_name, updated_column); if (condition && settings.return_mutated_rows) stages.back().filters.push_back(condition); @@ -986,27 +1027,42 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s auto all_columns = storage_snapshot->getColumnsByNames(options, available_columns); /// Add _row_exists column if it is present in the part - if (source.hasLightweightDeleteMask()) - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); + if (source.hasLightweightDeleteMask() || deleted_mask_updated) + all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN); + bool has_filters = false; /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) { if (settings.return_all_columns || !prepared_stages[i].filters.empty()) { for (const auto & column : all_columns) + { + if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated) + continue; + prepared_stages[i].output_columns.insert(column.name); - continue; + } + + has_filters = true; + settings.apply_deleted_mask = true; } + else + { + if (i > 0) + prepared_stages[i].output_columns = prepared_stages[i - 1].output_columns; - if (i > 0) - prepared_stages[i].output_columns = prepared_stages[i - 1].output_columns; + /// Make sure that all updated columns are included into output_columns set. + /// This is important for a "hidden" column like _row_exists gets because it is a virtual column + /// and so it is not in the list of AllPhysical columns. + for (const auto & [column_name, _] : prepared_stages[i].column_to_updated) + { + if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated) + continue; - /// Make sure that all updated columns are included into output_columns set. - /// This is important for a "hidden" column like _row_exists gets because it is a virtual column - /// and so it is not in the list of AllPhysical columns. - for (const auto & kv : prepared_stages[i].column_to_updated) - prepared_stages[i].output_columns.insert(kv.first); + prepared_stages[i].output_columns.insert(column_name); + } + } } /// Now, calculate `expressions_chain` for each stage except the first. @@ -1024,7 +1080,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s all_asts->children.push_back(kv.second); /// Add all output columns to prevent ExpressionAnalyzer from deleting them from source columns. - for (const String & column : stage.output_columns) + for (const auto & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); /// Executing scalar subquery on that stage can lead to deadlock @@ -1081,7 +1137,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s actions_chain.getLastStep().addRequiredOutput(name); actions_chain.getLastActions(); - actions_chain.finalize(); if (i) @@ -1224,7 +1279,7 @@ void MutationsInterpreter::Source::read( VirtualColumns virtual_columns(std::move(required_columns), part); - createMergeTreeSequentialSource( + createReadFromPartStep( plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_, diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 1372ea77f4f..eda94190185 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -32,6 +32,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( ContextPtr context ); +MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command); + /// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs) /// to this data. class MutationsInterpreter @@ -213,6 +215,7 @@ private: std::unique_ptr updated_header; std::vector stages; bool is_prepared = false; /// Has the sequence of stages been prepared. + bool deleted_mask_updated = false; NameSet materialized_indices; NameSet materialized_projections; diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 973c5260ea1..338775bfb0c 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -245,6 +245,7 @@ bool PartLog::addNewParts( elem.part_type = part->getType(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); + elem.bytes_uncompressed = part->getBytesUncompressedOnDisk(); elem.rows = part->rows_count; elem.error = static_cast(execution_status.code); diff --git a/src/Interpreters/PasteJoin.h b/src/Interpreters/PasteJoin.h new file mode 100644 index 00000000000..df7bb2f280c --- /dev/null +++ b/src/Interpreters/PasteJoin.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + +/// Dummy class, actual joining is done by MergeTransform +class PasteJoin : public IJoin +{ +public: + explicit PasteJoin(std::shared_ptr table_join_, const Block & right_sample_block_) + : table_join(table_join_) + , right_sample_block(right_sample_block_) + { + LOG_TRACE(&Poco::Logger::get("PasteJoin"), "Will use paste join"); + } + + std::string getName() const override { return "PasteJoin"; } + const TableJoin & getTableJoin() const override { return *table_join; } + + bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoin::addBlockToJoin should not be called"); + } + + static bool isSupported(const std::shared_ptr & table_join) + { + bool support_storage = !table_join->isSpecialStorage(); + + /// Key column can change nullability and it's not handled on type conversion stage, so algorithm should be aware of it + bool support_using = !table_join->hasUsing(); + + bool check_strictness = table_join->strictness() == JoinStrictness::All; + + bool if_has_keys = table_join->getClauses().empty(); + + return support_using && support_storage && check_strictness && if_has_keys; + } + + void checkTypesOfKeys(const Block & /*left_block*/) const override + { + if (!isSupported(table_join)) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "PasteJoin doesn't support specified query"); + } + + /// Used just to get result header + void joinBlock(Block & block, std::shared_ptr & /* not_processed */) override + { + for (const auto & col : right_sample_block) + block.insert(col); + block = materializeBlock(block).cloneEmpty(); + } + + void setTotals(const Block & block) override { totals = block; } + const Block & getTotals() const override { return totals; } + + size_t getTotalRowCount() const override + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoin::getTotalRowCount should not be called"); + } + + size_t getTotalByteCount() const override + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoin::getTotalByteCount should not be called"); + } + + bool alwaysReturnsEmptySet() const override { return false; } + + IBlocksStreamPtr + getNonJoinedBlocks(const Block & /* left_sample_block */, const Block & /* result_sample_block */, UInt64 /* max_block_size */) const override + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoin::getNonJoinedBlocks should not be called"); + } + + /// Left and right streams have the same priority and are processed simultaneously + JoinPipelineType pipelineType() const override { return JoinPipelineType::YShaped; } + +private: + std::shared_ptr table_join; + Block right_sample_block; + Block totals; +}; + +} diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 955d8892284..18a25482b7f 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -98,10 +98,12 @@ FutureSetFromSubquery::FutureSetFromSubquery( std::unique_ptr source_, StoragePtr external_table_, FutureSetPtr external_table_set_, - const Settings & settings) + const Settings & settings, + bool in_subquery_) : external_table(std::move(external_table_)) , external_table_set(std::move(external_table_set_)) , source(std::move(source_)) + , in_subquery(in_subquery_) { set_and_key = std::make_shared(); set_and_key->key = std::move(key); @@ -261,14 +263,16 @@ FutureSetPtr PreparedSets::addFromSubquery( std::unique_ptr source, StoragePtr external_table, FutureSetPtr external_table_set, - const Settings & settings) + const Settings & settings, + bool in_subquery) { auto from_subquery = std::make_shared( toString(key, {}), std::move(source), std::move(external_table), std::move(external_table_set), - settings); + settings, + in_subquery); auto [it, inserted] = sets_from_subqueries.emplace(key, from_subquery); @@ -318,6 +322,15 @@ std::shared_ptr PreparedSets::findSubquery(const Hash & k return it->second; } +void PreparedSets::markAsINSubquery(const Hash & key) +{ + auto it = sets_from_subqueries.find(key); + if (it == sets_from_subqueries.end()) + return; + + it->second->markAsINSubquery(); +} + std::shared_ptr PreparedSets::findStorage(const Hash & key) const { auto it = sets_from_storage.find(key); @@ -327,11 +340,11 @@ std::shared_ptr PreparedSets::findStorage(const Hash & key return it->second; } -PreparedSets::Subqueries PreparedSets::getSubqueries() +PreparedSets::Subqueries PreparedSets::getSubqueries() const { PreparedSets::Subqueries res; res.reserve(sets_from_subqueries.size()); - for (auto & [_, set] : sets_from_subqueries) + for (const auto & [_, set] : sets_from_subqueries) res.push_back(set); return res; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index e237789c63c..9f8bac9f71c 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -59,7 +59,7 @@ using FutureSetPtr = std::shared_ptr; class FutureSetFromStorage final : public FutureSet { public: - FutureSetFromStorage(SetPtr set_); + explicit FutureSetFromStorage(SetPtr set_); SetPtr get() const override; DataTypes getTypes() const override; @@ -97,7 +97,8 @@ public: std::unique_ptr source_, StoragePtr external_table_, FutureSetPtr external_table_set_, - const Settings & settings); + const Settings & settings, + bool in_subquery_); FutureSetFromSubquery( String key, @@ -112,6 +113,8 @@ public: QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); } void setQueryPlan(std::unique_ptr source_); + void markAsINSubquery() { in_subquery = true; } + bool isINSubquery() const { return in_subquery; } private: SetAndKeyPtr set_and_key; @@ -120,6 +123,11 @@ private: std::unique_ptr source; QueryTreeNodePtr query_tree; + bool in_subquery = false; // subquery used in IN operator + // the flag can be removed after enabling new analyzer and removing interpreter + // or after enabling support IN operator with subqueries in parallel replicas + // Note: it's necessary with interpreter since prepared sets used also for GLOBAL JOINs, + // with new analyzer it's not a case }; /// Container for all the sets used in query. @@ -145,7 +153,8 @@ public: std::unique_ptr source, StoragePtr external_table, FutureSetPtr external_table_set, - const Settings & settings); + const Settings & settings, + bool in_subquery = false); FutureSetPtr addFromSubquery( const Hash & key, @@ -155,9 +164,11 @@ public: FutureSetPtr findTuple(const Hash & key, const DataTypes & types) const; std::shared_ptr findStorage(const Hash & key) const; std::shared_ptr findSubquery(const Hash & key) const; + void markAsINSubquery(const Hash & key); using Subqueries = std::vector>; - Subqueries getSubqueries(); + Subqueries getSubqueries() const; + bool hasSubqueries() const { return !sets_from_subqueries.empty(); } const SetsFromTuple & getSetsFromTuple() const { return sets_from_tuple; } // const SetsFromStorage & getSetsFromStorage() const { return sets_from_storage; } diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 1bcec02f0c0..c07d783788a 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { @@ -126,7 +127,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS if (const auto * identifier = node->as()) data.addColumnIdentifier(*identifier); - else + else if (!node->as()) data.addColumnAliasIfAny(*node); } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index fa289b82aaf..5f3492f0871 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -34,6 +34,7 @@ #include #include +#include namespace DB { @@ -375,7 +376,7 @@ void TableJoin::addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, boo * For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin, * when part of plan built and types of expression will be known. */ - inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)); + inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage()); if (auto it = left_type_map.find(col.name); it != left_type_map.end()) { @@ -558,7 +559,8 @@ TableJoin::createConvertingActions( */ NameToNameMap left_column_rename; NameToNameMap right_column_rename; - inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)); + + inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage()); if (!left_type_map.empty() || !right_type_map.empty()) { left_dag = applyKeyConvertToTable(left_sample_columns, left_type_map, JoinTableSide::Left, left_column_rename); @@ -612,8 +614,11 @@ TableJoin::createConvertingActions( } template -void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict) +void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right) { + /// FullSortingMerge and PartialMerge join algorithms don't support joining keys with different types + /// (e.g. String and LowCardinality(String)) + bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); if (!left_type_map.empty() || !right_type_map.empty()) return; @@ -645,7 +650,7 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig const auto & ltype = ltypeit->second; const auto & rtype = rtypeit->second; - bool type_equals = strict ? ltype->equals(*rtype) : JoinCommon::typesEqualUpToNullability(ltype, rtype); + bool type_equals = require_strict_keys_match ? ltype->equals(*rtype) : JoinCommon::typesEqualUpToNullability(ltype, rtype); if (type_equals) return true; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index f97e6a74b8c..247835d9c53 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -218,7 +218,7 @@ private: /// Calculates common supertypes for corresponding join key columns. template - void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict); + void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right); void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); diff --git a/src/Interpreters/TreeCNFConverter.cpp b/src/Interpreters/TreeCNFConverter.cpp index 1613b09ee48..d2c7300c80c 100644 --- a/src/Interpreters/TreeCNFConverter.cpp +++ b/src/Interpreters/TreeCNFConverter.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -239,7 +240,8 @@ CNFQuery TreeCNFConverter::toCNF( if (!cnf) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Cannot convert expression '{}' to CNF, because it produces to many clauses." - "Size of boolean formula in CNF can be exponential of size of source formula."); + "Size of boolean formula in CNF can be exponential of size of source formula.", + queryToString(query)); return *cnf; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index c63aae32090..9cbf24091e3 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -73,6 +73,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_IDENTIFIER; + extern const int UNEXPECTED_EXPRESSION; } namespace @@ -776,6 +777,37 @@ void expandGroupByAll(ASTSelectQuery * select_query) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list); } +void expandOrderByAll(ASTSelectQuery * select_query) +{ + auto * all_elem = select_query->orderBy()->children[0]->as(); + if (!all_elem) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not order by asts."); + + auto order_expression_list = std::make_shared(); + + for (const auto & expr : select_query->select()->children) + { + if (auto * identifier = expr->as(); identifier != nullptr) + if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, + "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + + if (auto * function = expr->as(); function != nullptr) + if (Poco::toUpper(function->alias) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, + "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + + auto elem = std::make_shared(); + elem->direction = all_elem->direction; + elem->nulls_direction = all_elem->nulls_direction; + elem->nulls_direction_was_explicitly_specified = all_elem->nulls_direction_was_explicitly_specified; + elem->children.push_back(expr); + order_expression_list->children.push_back(elem); + } + + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, order_expression_list); +} + ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { /// There can not be aggregate functions inside the WHERE and PREWHERE. @@ -1292,6 +1324,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if (select_query->group_by_all) expandGroupByAll(select_query); + // expand ORDER BY ALL + if (settings.enable_order_by_all && select_query->order_by_all) + expandOrderByAll(select_query); + /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 5d8a9e0582d..9486350a0f6 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -41,12 +41,9 @@ static ZooKeeperRetriesInfo getRetriesInfo() { const auto & config_ref = Context::getGlobalContextInstance()->getConfigRef(); return ZooKeeperRetriesInfo( - "DistributedDDL", - &Poco::Logger::get("DDLQueryStatusSource"), config_ref.getInt("distributed_ddl_keeper_max_retries", 5), config_ref.getInt("distributed_ddl_keeper_initial_backoff_ms", 100), - config_ref.getInt("distributed_ddl_keeper_max_backoff_ms", 5000) - ); + config_ref.getInt("distributed_ddl_keeper_max_backoff_ms", 5000)); } bool isSupportedAlterTypeForOnClusterDDLQuery(int type) @@ -438,8 +435,8 @@ Chunk DDLQueryStatusSource::generate() Strings tmp_active_hosts; { - auto retries_info = getRetriesInfo(); - auto retries_ctl = ZooKeeperRetriesControl("executeDDLQueryOnCluster", retries_info, context->getProcessListElement()); + auto retries_ctl = ZooKeeperRetriesControl( + "executeDDLQueryOnCluster", &Poco::Logger::get("DDLQueryStatusSource"), getRetriesInfo(), context->getProcessListElement()); retries_ctl.retryLoop([&]() { auto zookeeper = context->getZooKeeper(); @@ -478,8 +475,11 @@ Chunk DDLQueryStatusSource::generate() String status_data; bool finished_exists = false; - auto retries_info = getRetriesInfo(); - auto retries_ctl = ZooKeeperRetriesControl("executeDDLQueryOnCluster", retries_info, context->getProcessListElement()); + auto retries_ctl = ZooKeeperRetriesControl( + "executeDDLQueryOnCluster", + &Poco::Logger::get("DDLQueryStatusSource"), + getRetriesInfo(), + context->getProcessListElement()); retries_ctl.retryLoop([&]() { finished_exists = context->getZooKeeper()->tryGet(fs::path(node_path) / "finished" / host_id, status_data); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 84893011222..ed9de6a46eb 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -466,6 +466,16 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO "; rename_to->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::APPLY_DELETED_MASK) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "APPLY DELETED MASK" << (settings.hilite ? hilite_none : ""); + + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + } else throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER"); } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index e601739595f..77c540aed33 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -71,6 +71,7 @@ public: DELETE, UPDATE, + APPLY_DELETED_MASK, NO_TYPE, diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 7c96db006c4..2115de1c124 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -144,7 +144,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F window()->as().formatImplMultiline(s, state, frame); } - if (orderBy()) + if (!order_by_all && orderBy()) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : ""); s.one_line @@ -163,6 +163,24 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F } } + if (order_by_all) + { + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : ""); + + auto * elem = orderBy()->children[0]->as(); + s.ostr << (s.hilite ? hilite_keyword : "") + << (elem->direction == -1 ? " DESC" : " ASC") + << (s.hilite ? hilite_none : ""); + + if (elem->nulls_direction_was_explicitly_specified) + { + s.ostr << (s.hilite ? hilite_keyword : "") + << " NULLS " + << (elem->nulls_direction == elem->direction ? "LAST" : "FIRST") + << (s.hilite ? hilite_none : ""); + } + } + if (limitByLength()) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 57f45a8aacd..eb171dc00ee 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -87,6 +87,7 @@ public: bool group_by_with_cube = false; bool group_by_with_constant_keys = false; bool group_by_with_grouping_sets = false; + bool order_by_all = false; bool limit_with_ties = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index e4e8c00879e..8ca2ee0efae 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -211,6 +211,9 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format case JoinKind::Comma: settings.ostr << ","; break; + case JoinKind::Paste: + settings.ostr << "PASTE JOIN"; + break; } settings.ostr << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 28b7a3de9f6..2a0060f20f2 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -111,6 +111,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_remove_ttl("REMOVE TTL"); ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY"); + ParserKeyword s_apply_deleted_mask("APPLY DELETED MASK"); ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; @@ -823,6 +824,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MODIFY_COMMENT; } + else if (s_apply_deleted_mask.ignore(pos, expected)) + { + command->type = ASTAlterCommand::APPLY_DELETED_MASK; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } else return false; } diff --git a/src/Parsers/ParserOptimizeQuery.cpp b/src/Parsers/ParserOptimizeQuery.cpp index 826fbf38b36..5d3b196caf8 100644 --- a/src/Parsers/ParserOptimizeQuery.cpp +++ b/src/Parsers/ParserOptimizeQuery.cpp @@ -28,6 +28,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ParserKeyword s_partition("PARTITION"); ParserKeyword s_final("FINAL"); ParserKeyword s_deduplicate("DEDUPLICATE"); + ParserKeyword s_cleanup("CLEANUP"); ParserKeyword s_by("BY"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p(true); @@ -76,6 +77,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return false; } + /// Obsolete feature, ignored for backward compatibility. + s_cleanup.ignore(pos, expected); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 341c1ef60b4..641e74b5f18 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -287,6 +288,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) interpolate_expression_list = std::make_shared(); } } + else if (order_expression_list->children.size() == 1) + { + /// ORDER BY ALL + auto * identifier = order_expression_list->children[0]->as()->children[0]->as(); + if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL") + select_query->order_by_all = true; + } } /// This is needed for TOP expression, because it can also use WITH TIES. diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index b3ae6ca0bb9..ca209739dad 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -166,6 +167,8 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec table_join->kind = JoinKind::Full; else if (ParserKeyword("CROSS").ignore(pos)) table_join->kind = JoinKind::Cross; + else if (ParserKeyword("PASTE").ignore(pos)) + table_join->kind = JoinKind::Paste; else no_kind = true; @@ -191,8 +194,8 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec } if (table_join->strictness != JoinStrictness::Unspecified - && table_join->kind == JoinKind::Cross) - throw Exception(ErrorCodes::SYNTAX_ERROR, "You must not specify ANY or ALL for CROSS JOIN."); + && (table_join->kind == JoinKind::Cross || table_join->kind == JoinKind::Paste)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "You must not specify ANY or ALL for {} JOIN.", toString(table_join->kind)); if ((table_join->strictness == JoinStrictness::Semi || table_join->strictness == JoinStrictness::Anti) && (table_join->kind != JoinKind::Left && table_join->kind != JoinKind::Right)) @@ -206,7 +209,7 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec return false; if (table_join->kind != JoinKind::Comma - && table_join->kind != JoinKind::Cross) + && table_join->kind != JoinKind::Cross && table_join->kind != JoinKind::Paste) { if (ParserKeyword("USING").ignore(pos, expected)) { diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index a6806a628bf..8012dbb37c6 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -1095,7 +1095,11 @@ void obfuscateIdentifier(std::string_view src, WriteBuffer & result, WordMap & o } -void obfuscateLiteral(std::string_view src, WriteBuffer & result, SipHash hash_func) +void obfuscateLiteral( + std::string_view src, + WriteBuffer & result, + SipHash hash_func, + KnownIdentifierFunc known_identifier_func) { const char * src_pos = src.data(); const char * src_end = src_pos + src.size(); @@ -1208,15 +1212,15 @@ void obfuscateLiteral(std::string_view src, WriteBuffer & result, SipHash hash_f } else if (isAlphaASCII(src_pos[0])) { - /// Alphabetial characters + /// Alphabetical characters const char * alpha_end = src_pos + 1; while (alpha_end < src_end && isAlphaASCII(*alpha_end)) ++alpha_end; - String wordcopy(src_pos, alpha_end); - Poco::toUpperInPlace(wordcopy); - if (keep_words.contains(wordcopy)) + String word(src_pos, alpha_end); + String wordcopy = Poco::toUpper(word); + if (keep_words.contains(wordcopy) || known_identifier_func(word)) { result.write(src_pos, alpha_end - src_pos); src_pos = alpha_end; @@ -1337,14 +1341,14 @@ void obfuscateQueries( } else if (token.type == TokenType::Number) { - obfuscateLiteral(whole_token, result, hash_func); + obfuscateLiteral(whole_token, result, hash_func, known_identifier_func); } else if (token.type == TokenType::StringLiteral) { assert(token.size() >= 2); result.write(*token.begin); - obfuscateLiteral({token.begin + 1, token.size() - 2}, result, hash_func); + obfuscateLiteral({token.begin + 1, token.size() - 2}, result, hash_func, known_identifier_func); result.write(token.end[-1]); } else if (token.type == TokenType::Comment) @@ -1360,4 +1364,3 @@ void obfuscateQueries( } } - diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2ab88491357..74ae3028928 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -915,6 +915,7 @@ void addWindowSteps(QueryPlan & query_plan, auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), window_description.full_sort_description, + window_description.partition_by, 0 /*limit*/, sort_settings, settings.optimize_sorting_by_input_stream_properties); @@ -1332,15 +1333,28 @@ void Planner::buildPlanForQueryNode() } collectSets(query_tree, *planner_context); + + const auto & settings = query_context->getSettingsRef(); + if (query_context->canUseTaskBasedParallelReplicas()) + { + if (planner_context->getPreparedSets().hasSubqueries()) + { + if (settings.allow_experimental_parallel_reading_from_replicas == 2) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); + + auto & mutable_context = planner_context->getMutableQueryContext(); + mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + LOG_DEBUG(&Poco::Logger::get("Planner"), "Disabling parallel replicas to execute a query with IN with subquery"); + } + } + collectTableExpressionData(query_tree, planner_context); checkStoragesSupportTransactions(planner_context); if (!select_query_options.only_analyze) collectFiltersForAnalysis(query_tree, planner_context); - const auto & settings = query_context->getSettingsRef(); - - if (settings.allow_experimental_parallel_reading_from_replicas > 0) + if (query_context->canUseTaskBasedParallelReplicas()) { const auto & table_expression_nodes = planner_context->getTableExpressionNodeToData(); for (const auto & it : table_expression_nodes) @@ -1366,7 +1380,7 @@ void Planner::buildPlanForQueryNode() } } - if (settings.allow_experimental_parallel_reading_from_replicas > 0 || !settings.parallel_replicas_custom_key.value.empty()) + if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty()) { /// Check support for JOIN for parallel replicas with custom key if (planner_context->getTableExpressionNodeToData().size() > 1) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index abcf971b832..e2cdf146a69 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -955,6 +955,29 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres }; } +void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function) +{ + auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : cast_actions_dag->getOutputs()) + { + if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) + { + DataTypePtr type_to_check = output_node->result_type; + if (const auto * type_to_check_low_cardinality = typeid_cast(type_to_check.get())) + type_to_check = type_to_check_low_cardinality->getDictionaryType(); + + if (type_to_check->canBeInsideNullable()) + output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); + } + } + + cast_actions_dag->projectInput(); + auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); + cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); + plan_to_add_cast.addStep(std::move(cast_join_columns_step)); +} + JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression, JoinTreeQueryPlan left_join_tree_query_plan, JoinTreeQueryPlan right_join_tree_query_plan, @@ -1068,45 +1091,21 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - auto to_nullable_function = FunctionFactory::instance().get("toNullable", query_context); - - auto join_cast_plan_columns_to_nullable = [&](QueryPlan & plan_to_add_cast) - { - auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); - - for (auto & output_node : cast_actions_dag->getOutputs()) - { - if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) - { - DataTypePtr type_to_check = output_node->result_type; - if (const auto * type_to_check_low_cardinality = typeid_cast(type_to_check.get())) - type_to_check = type_to_check_low_cardinality->getDictionaryType(); - - if (type_to_check->canBeInsideNullable()) - output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); - } - } - - cast_actions_dag->projectInput(); - auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); - cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); - plan_to_add_cast.addStep(std::move(cast_join_columns_step)); - }; - if (settings.join_use_nulls) { + auto to_nullable_function = FunctionFactory::instance().get("toNullable", query_context); if (isFull(join_kind)) { - join_cast_plan_columns_to_nullable(left_plan); - join_cast_plan_columns_to_nullable(right_plan); + joinCastPlanColumnsToNullable(left_plan, planner_context, to_nullable_function); + joinCastPlanColumnsToNullable(right_plan, planner_context, to_nullable_function); } else if (isLeft(join_kind)) { - join_cast_plan_columns_to_nullable(right_plan); + joinCastPlanColumnsToNullable(right_plan, planner_context, to_nullable_function); } else if (isRight(join_kind)) { - join_cast_plan_columns_to_nullable(left_plan); + joinCastPlanColumnsToNullable(left_plan, planner_context, to_nullable_function); } } @@ -1312,7 +1311,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ return step_raw_ptr; }; - if (join_algorithm->pipelineType() == JoinPipelineType::YShaped) + if (join_algorithm->pipelineType() == JoinPipelineType::YShaped && join_kind != JoinKind::Paste) { const auto & join_clause = table_join->getOnlyClause(); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 5fda2a8617d..5e9de4dedcf 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -653,6 +654,8 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, const Block & right_table_expression_header, const PlannerContextPtr & planner_context) { + if (table_join->kind() == JoinKind::Paste) + return std::make_shared(table_join, right_table_expression_header); /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary if (algorithm == JoinAlgorithm::DIRECT || algorithm == JoinAlgorithm::DEFAULT) { diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index ee5a05fe7da..9f963dc182a 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -63,7 +63,7 @@ public: void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) { if (hasColumn(column.name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists", column.name); addColumnImpl(column, column_identifier); } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index b4d2605e871..9841b5e70c6 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1289,7 +1289,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_MAP: return std::make_shared(avroNodeToDataType(node->leafAt(0)), avroNodeToDataType(node->leafAt(1))); default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro column {} is not supported for inserting."); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro column {} is not supported for inserting.", nodeName(node)); } } diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp index 19ed5c94dfd..4c3bb219415 100644 --- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp @@ -308,6 +308,41 @@ static inline void throwIfError(llvm::Error & e, const char * what) throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Failed to parse {}: {}", what, llvm::toString(std::move(e))); } +llvm::DWARFFormValue DWARFBlockInputFormat::parseAttribute( + const llvm::DWARFAbbreviationDeclaration::AttributeSpec & attr, uint64_t * offset, + const UnitState & unit) const +{ + auto val = llvm::DWARFFormValue::createFromSValue( + attr.Form, attr.isImplicitConst() ? attr.getImplicitConstValue() : 0); + if (!val.extractValue(*extractor, offset, unit.dwarf_unit->getFormParams(), unit.dwarf_unit)) + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, + "Failed to parse attribute {} of form {} at offset {}", + llvm::dwarf::AttributeString(attr.Attr), attr.Form, *offset); + return val; +} + +void DWARFBlockInputFormat::skipAttribute( + const llvm::DWARFAbbreviationDeclaration::AttributeSpec & attr, uint64_t * offset, + const UnitState & unit) const +{ + if (!llvm::DWARFFormValue::skipValue( + attr.Form, *extractor, offset, unit.dwarf_unit->getFormParams())) + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, + "Failed to skip attribute {} of form {} at offset {}", + llvm::dwarf::AttributeString(attr.Attr), attr.Form, *offset); +} + +uint64_t DWARFBlockInputFormat::parseAddress(llvm::dwarf::Attribute attr, const llvm::DWARFFormValue & val, const UnitState & unit) +{ + if (val.getForm() == llvm::dwarf::DW_FORM_addr) + return val.getRawUValue(); + if (val.getForm() == llvm::dwarf::DW_FORM_addrx || + (val.getForm() >= llvm::dwarf::DW_FORM_addrx1 && + val.getForm() <= llvm::dwarf::DW_FORM_addrx4)) + return fetchFromDebugAddr(unit.debug_addr_base, val.getRawUValue()); + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Form {} for {} is not supported", llvm::dwarf::FormEncodingString(val.getForm()), llvm::dwarf::AttributeString(attr)); +} + Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) { const auto & header = getPort().getHeader(); @@ -315,7 +350,6 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) std::array need{}; for (const std::string & name : header.getNames()) need[column_name_to_idx.at(name)] = true; - auto form_params = unit.dwarf_unit->getFormParams(); /// For parallel arrays, we nominate one of them to be responsible for populating the offsets vector. need[COL_ATTR_NAME] = need[COL_ATTR_NAME] || need[COL_ATTR_FORM] || need[COL_ATTR_INT] || need[COL_ATTR_STR]; @@ -390,6 +424,34 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) if (need[COL_TAG]) col_tag->insertValue(tag); + if (tag == llvm::dwarf::DW_TAG_compile_unit) + { + /// Pre-parse DW_AT_addr_base and DW_AT_rnglists_base because other attributes may + /// rely on them. (Why couldn't DWARF just promise that these attributes must appear + /// before any attributes that depend on them?) + uint64_t offset = unit.offset; + std::optional low_pc; + for (auto attr : abbrev->attributes()) + { + if (attr.Attr == llvm::dwarf::DW_AT_addr_base || + attr.Attr == llvm::dwarf::DW_AT_rnglists_base) + { + auto val = parseAttribute(attr, &offset, unit); + if (attr.Attr == llvm::dwarf::DW_AT_addr_base) + unit.debug_addr_base = val.getRawUValue(); + else + unit.rnglists_base = val.getRawUValue(); + } + else if (attr.Attr == llvm::dwarf::DW_AT_low_pc) + low_pc = parseAttribute(attr, &offset, unit); + else + skipAttribute(attr, &offset, unit); + } + /// May use addr_base. + if (low_pc.has_value()) + unit.base_address = parseAddress(llvm::dwarf::DW_AT_low_pc, *low_pc, unit); + } + bool need_name = need[COL_NAME]; bool need_linkage_name = need[COL_LINKAGE_NAME]; bool need_decl_file = need[COL_DECL_FILE]; @@ -410,11 +472,7 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) for (auto attr : abbrev->attributes()) { - auto val = llvm::DWARFFormValue::createFromSValue(attr.Form, attr.isImplicitConst() ? attr.getImplicitConstValue() : 0); - /// This is relatively slow, maybe we should reimplement it. - if (!val.extractValue(*extractor, &unit.offset, form_params, unit.dwarf_unit)) - throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Failed to parse attribute {} of form {} at offset {}", - llvm::dwarf::AttributeString(attr.Attr), attr.Form, unit.offset); + auto val = parseAttribute(attr, &unit.offset, unit); if (need[COL_ATTR_NAME]) col_attr_name->insertValue(attr.Attr); @@ -452,13 +510,6 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) if (attr.Attr == llvm::dwarf::DW_AT_decl_line && std::exchange(need_decl_line, false)) col_decl_line->insertValue(static_cast(val.getRawUValue())); - /// Starting offset of this unit's data in .debug_addr section. - if (attr.Attr == llvm::dwarf::DW_AT_addr_base) - unit.addr_base = val.getRawUValue(); - /// Same for .debug_rnglists section. - if (attr.Attr == llvm::dwarf::DW_AT_rnglists_base) - unit.rnglists_base = val.getRawUValue(); - if (attr.Attr == llvm::dwarf::DW_AT_high_pc) { high_pc = val.getRawUValue(); @@ -515,16 +566,7 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) if (need_ranges && (attr.Attr == llvm::dwarf::DW_AT_low_pc || attr.Attr == llvm::dwarf::DW_AT_high_pc)) { - UInt64 addr; - if (val.getForm() == llvm::dwarf::DW_FORM_addr) - addr = val.getRawUValue(); - else if (val.getForm() == llvm::dwarf::DW_FORM_addrx || - (val.getForm() >= llvm::dwarf::DW_FORM_addrx1 && - val.getForm() <= llvm::dwarf::DW_FORM_addrx4)) - addr = fetchFromDebugAddr(unit.addr_base, val.getRawUValue()); - else - throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Form {} for {} is not supported", llvm::dwarf::FormEncodingString(val.getForm()), llvm::dwarf::AttributeString(attr.Attr)); - + UInt64 addr = parseAddress(attr.Attr, val, unit); if (attr.Attr == llvm::dwarf::DW_AT_low_pc) low_pc = addr; else @@ -618,7 +660,7 @@ Chunk DWARFBlockInputFormat::parseEntries(UnitState & unit) if (need_ranges) { if (ranges.has_value()) - parseRanges(*ranges, ranges_rnglistx, low_pc, unit, col_ranges_start, col_ranges_end); + parseRanges(*ranges, ranges_rnglistx, unit, col_ranges_start, col_ranges_end); else if (low_pc.has_value()) { UInt64 high; @@ -740,7 +782,7 @@ void DWARFBlockInputFormat::parseFilenameTable(UnitState & unit, uint64_t offset auto error = prologue.parse(*debug_line_extractor, &offset, /*RecoverableErrorHandler*/ [&](auto e) { if (++seen_debug_line_warnings < 10) - LOG_INFO(&Poco::Logger::get("DWARF"), "{}", llvm::toString(std::move(e))); + LOG_INFO(&Poco::Logger::get("DWARF"), "Parsing error: {}", llvm::toString(std::move(e))); }, *dwarf_context, unit.dwarf_unit); if (error) @@ -783,12 +825,12 @@ uint64_t DWARFBlockInputFormat::fetchFromDebugAddr(uint64_t addr_base, uint64_t } void DWARFBlockInputFormat::parseRanges( - uint64_t offset, bool form_rnglistx, std::optional low_pc, const UnitState & unit, const ColumnVector::MutablePtr & col_ranges_start, + uint64_t offset, bool form_rnglistx, const UnitState & unit, const ColumnVector::MutablePtr & col_ranges_start, const ColumnVector::MutablePtr & col_ranges_end) const { llvm::Optional base_addr; - if (low_pc.has_value()) - base_addr = llvm::object::SectionedAddress{.Address = *low_pc}; + if (unit.base_address != UINT64_MAX) + base_addr = llvm::object::SectionedAddress{.Address = unit.base_address}; llvm::DWARFAddressRangesVector ranges; @@ -833,7 +875,7 @@ void DWARFBlockInputFormat::parseRanges( auto lookup_addr = [&](uint32_t idx) -> llvm::Optional { - uint64_t addr = fetchFromDebugAddr(unit.addr_base, idx); + uint64_t addr = fetchFromDebugAddr(unit.debug_addr_base, idx); return llvm::object::SectionedAddress{.Address = addr}; }; ranges = list.getAbsoluteRanges(base_addr, /*AddressByteSize*/ 8, lookup_addr); diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.h b/src/Processors/Formats/Impl/DWARFBlockInputFormat.h index 07c00656e4d..e1409dd3373 100644 --- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.h +++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.h @@ -53,8 +53,11 @@ private: std::string unit_name; ColumnPtr filename_table; // from .debug_line size_t filename_table_size = 0; - uint64_t addr_base = UINT64_MAX; + /// Starting offset of this unit's data in .debug_addr and .debug_rnglists sections. + uint64_t debug_addr_base = UINT64_MAX; uint64_t rnglists_base = UINT64_MAX; + /// "Base address" for parsing range lists. Not to be confused with "addr base". + uint64_t base_address = UINT64_MAX; uint64_t offset = 0; std::vector stack; @@ -102,11 +105,18 @@ private: void parseFilenameTable(UnitState & unit, uint64_t offset); Chunk parseEntries(UnitState & unit); + llvm::DWARFFormValue parseAttribute( + const llvm::DWARFAbbreviationDeclaration::AttributeSpec & attr, uint64_t * offset, + const UnitState & unit) const; + void skipAttribute( + const llvm::DWARFAbbreviationDeclaration::AttributeSpec & attr, uint64_t * offset, + const UnitState & unit) const; + uint64_t parseAddress(llvm::dwarf::Attribute attr, const llvm::DWARFFormValue & val, const UnitState & unit); /// Parse .debug_addr entry. uint64_t fetchFromDebugAddr(uint64_t addr_base, uint64_t idx) const; /// Parse .debug_ranges (DWARF4) or .debug_rnglists (DWARF5) entry. void parseRanges( - uint64_t offset, bool form_rnglistx, std::optional low_pc, const UnitState & unit, + uint64_t offset, bool form_rnglistx, const UnitState & unit, const ColumnVector::MutablePtr & col_ranges_start, const ColumnVector::MutablePtr & col_ranges_end) const; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index f6bd4f51289..6db0ee61e32 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -91,7 +91,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) bool quotes = false; if (*buf->position() != '{') - throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object must begin with '{'."); + throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object must begin with '{{'."); ++buf->position(); ++balance; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index b301b9527c1..08dc9e2d511 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -284,23 +284,4 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory) } } -void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) -{ - auto register_func = [&](const String & format_name, bool with_names, bool with_types) - { - /// In case when we have names and/or types in the first two/one rows, - /// we need to read at least one more row of actual data. So, set - /// the minimum of rows for segmentation engine according to - /// parameters with_names and with_types. - size_t min_rows = 1 + int(with_names) + int(with_types); - factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) - { - return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_bytes, min_rows, max_rows); - }); - }; - - registerWithNamesAndTypes("JSONCompactEachRow", register_func); - registerWithNamesAndTypes("JSONCompactStringsEachRow", register_func); -} - } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 875b0d9bdbc..6b7ca59f921 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1042,6 +1042,38 @@ static void addMergingFinal( pipe.addTransform(get_merging_processor()); } +bool ReadFromMergeTree::doNotMergePartsAcrossPartitionsFinal() const +{ + const auto & settings = context->getSettingsRef(); + + /// If setting do_not_merge_across_partitions_select_final is set always prefer it + if (settings.do_not_merge_across_partitions_select_final.changed) + return settings.do_not_merge_across_partitions_select_final; + + if (!metadata_for_reading->hasPrimaryKey() || !metadata_for_reading->hasPartitionKey()) + return false; + + /** To avoid merging parts across partitions we want result of partition key expression for + * rows with same primary key to be the same. + * + * If partition key expression is deterministic, and contains only columns that are included + * in primary key, then for same primary key column values, result of partition key expression + * will be the same. + */ + const auto & partition_key_expression = metadata_for_reading->getPartitionKey().expression; + if (partition_key_expression->getActionsDAG().hasNonDeterministic()) + return false; + + const auto & primary_key_columns = metadata_for_reading->getPrimaryKey().column_names; + NameSet primary_key_columns_set(primary_key_columns.begin(), primary_key_columns.end()); + + const auto & partition_key_required_columns = partition_key_expression->getRequiredColumns(); + for (const auto & partition_key_required_column : partition_key_required_columns) + if (!primary_key_columns_set.contains(partition_key_required_column)) + return false; + + return true; +} Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection) @@ -1064,7 +1096,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( auto it = parts_with_ranges.begin(); parts_to_merge_ranges.push_back(it); - if (settings.do_not_merge_across_partitions_select_final) + bool do_not_merge_across_partitions_select_final = doNotMergePartsAcrossPartitionsFinal(); + if (do_not_merge_across_partitions_select_final) { while (it != parts_with_ranges.end()) { @@ -1097,7 +1130,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// with level > 0 then we won't post-process this part, and if num_streams > 1 we /// can use parallel select on such parts. - bool no_merging_final = settings.do_not_merge_across_partitions_select_final && + bool no_merging_final = do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0; Pipes pipes; @@ -1338,7 +1371,7 @@ static void buildIndexes( context, primary_key_column_names, primary_key.expression, - array_join_name_set}, {}, {}, {}, false, {}}); + array_join_name_set}, {}, {}, {}, {}, false, {}}); } else { @@ -1346,7 +1379,7 @@ static void buildIndexes( query_info, context, primary_key_column_names, - primary_key.expression}, {}, {}, {}, false, {}}); + primary_key.expression}, {}, {}, {}, {}, false, {}}); } if (metadata_snapshot->hasPartitionKey()) @@ -1365,6 +1398,8 @@ static void buildIndexes( else indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context); + MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context); + indexes->use_skip_indexes = settings.use_skip_indexes; bool final = query_info.isFinal(); @@ -1549,6 +1584,9 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( } LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); + if (indexes->part_offset_condition) + LOG_DEBUG(log, "Part offset condition: {}", indexes->part_offset_condition->toString()); + if (indexes->key_condition.alwaysFalse()) return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); @@ -1595,6 +1633,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( metadata_snapshot, context, indexes->key_condition, + indexes->part_offset_condition, indexes->skip_indexes, reader_settings, log, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 4e38e06c6af..e2c38ebb251 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -151,6 +151,7 @@ public: KeyCondition key_condition; std::optional partition_pruner; std::optional minmax_idx_condition; + std::optional part_offset_condition; UsefulSkipIndexes skip_indexes; bool use_skip_indexes; std::optional> part_values; @@ -288,6 +289,8 @@ private: ActionsDAGPtr & out_projection, const InputOrderInfoPtr & input_order_info); + bool doNotMergePartsAcrossPartitionsFinal() const; + Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index cd88f5cc93a..0d1fae0d239 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -236,7 +236,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared(), "_shard_num"}}; - if (context->canUseParallelReplicas()) + if (context->canUseTaskBasedParallelReplicas()) { if (context->getSettingsRef().cluster_for_parallel_replicas.changed) { @@ -258,7 +258,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); remote_query_executor->setLogger(log); - if (context->canUseParallelReplicas()) + if (context->canUseTaskBasedParallelReplicas()) { // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. @@ -367,7 +367,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder IConnections::ReplicaInfo replica_info { .all_replicas_count = all_replicas_count, - .number_of_current_replica = 0 + /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`). + /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. + .number_of_current_replica = shard.shard_num - 1, }; addPipeForSingeReplica(pipes, shard.pool, replica_info); @@ -386,7 +388,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder IConnections::ReplicaInfo replica_info { .all_replicas_count = all_replicas_count, - .number_of_current_replica = pipes.size() + /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`). + /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. + .number_of_current_replica = current_shard->shard_num - 1, }; addPipeForSingeReplica(pipes, current_shard->pool, replica_info); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 55ce763575e..641b9036d4c 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -9,6 +10,8 @@ #include #include +#include +#include namespace CurrentMetrics { @@ -76,6 +79,21 @@ SortingStep::SortingStep( output_stream->sort_scope = DataStream::SortScope::Global; } +SortingStep::SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + const SortDescription & partition_by_description_, + UInt64 limit_, + const Settings & settings_, + bool optimize_sorting_by_input_stream_properties_) + : SortingStep(input_stream, description_, limit_, settings_, optimize_sorting_by_input_stream_properties_) +{ + partition_by_description = partition_by_description_; + + output_stream->sort_description = result_description; + output_stream->sort_scope = DataStream::SortScope::Stream; +} + SortingStep::SortingStep( const DataStream & input_stream_, SortDescription prefix_description_, @@ -117,7 +135,11 @@ void SortingStep::updateOutputStream() { output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); output_stream->sort_description = result_description; - output_stream->sort_scope = DataStream::SortScope::Global; + + if (partition_by_description.empty()) + output_stream->sort_scope = DataStream::SortScope::Global; + else + output_stream->sort_scope = DataStream::SortScope::Stream; } void SortingStep::updateLimit(size_t limit_) @@ -135,6 +157,55 @@ void SortingStep::convertToFinishSorting(SortDescription prefix_description_) prefix_description = std::move(prefix_description_); } +void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline) +{ + size_t threads = pipeline.getNumThreads(); + size_t streams = pipeline.getNumStreams(); + + if (!partition_by_description.empty() && threads > 1) + { + Block stream_header = pipeline.getHeader(); + + ColumnNumbers key_columns; + key_columns.reserve(partition_by_description.size()); + for (auto & col : partition_by_description) + { + key_columns.push_back(stream_header.getPositionByName(col.column_name)); + } + + pipeline.transform([&](OutputPortRawPtrs ports) + { + Processors processors; + for (auto * port : ports) + { + auto scatter = std::make_shared(stream_header, threads, key_columns); + connect(*port, scatter->getInputs().front()); + processors.push_back(scatter); + } + return processors; + }); + + if (streams > 1) + { + pipeline.transform([&](OutputPortRawPtrs ports) + { + Processors processors; + for (size_t i = 0; i < threads; ++i) + { + size_t output_it = i; + auto resize = std::make_shared(stream_header, streams, 1); + auto & inputs = resize->getInputs(); + + for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it) + connect(*ports[output_it], *input_it); + processors.push_back(resize); + } + return processors; + }); + } + } +} + void SortingStep::finishSorting( QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, const UInt64 limit_) { @@ -260,10 +331,12 @@ void SortingStep::fullSortStreams( void SortingStep::fullSort( QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort) { + scatterByPartitionIfNeeded(pipeline); + fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort); /// If there are several streams, then we merge them into one - if (pipeline.getNumStreams() > 1) + if (pipeline.getNumStreams() > 1 && (partition_by_description.empty() || pipeline.getNumThreads() == 1)) { auto transform = std::make_shared( pipeline.getHeader(), @@ -295,6 +368,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build { bool need_finish_sorting = (prefix_description.size() < result_description.size()); mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit)); + if (need_finish_sorting) { finishSorting(pipeline, prefix_description, result_description, limit); diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 371a24ac6f2..52f48f66a32 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -40,6 +40,15 @@ public: const Settings & settings_, bool optimize_sorting_by_input_stream_properties_); + /// Full with partitioning + SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + const SortDescription & partition_by_description_, + UInt64 limit_, + const Settings & settings_, + bool optimize_sorting_by_input_stream_properties_); + /// FinishSorting SortingStep( const DataStream & input_stream_, @@ -83,14 +92,24 @@ public: bool skip_partial_sort = false); private: + void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline); void updateOutputStream() override; - static void - mergeSorting(QueryPipelineBuilder & pipeline, const Settings & sort_settings, const SortDescription & result_sort_desc, UInt64 limit_); + static void mergeSorting( + QueryPipelineBuilder & pipeline, + const Settings & sort_settings, + const SortDescription & result_sort_desc, + UInt64 limit_); - void mergingSorted(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, UInt64 limit_); + void mergingSorted( + QueryPipelineBuilder & pipeline, + const SortDescription & result_sort_desc, + UInt64 limit_); void finishSorting( - QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, UInt64 limit_); + QueryPipelineBuilder & pipeline, + const SortDescription & input_sort_desc, + const SortDescription & result_sort_desc, + UInt64 limit_); void fullSort( QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, @@ -101,6 +120,9 @@ private: SortDescription prefix_description; const SortDescription result_description; + + SortDescription partition_by_description; + UInt64 limit; bool always_read_till_end = false; diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index 9c68a4b73d1..bb4f429d626 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -67,7 +67,8 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ // This resize is needed for cases such as `over ()` when we don't have a // sort node, and the input might have multiple streams. The sort node would // have resized it. - pipeline.resize(1); + if (window_description.full_sort_description.empty()) + pipeline.resize(1); pipeline.addSimpleTransform( [&](const Block & /*header*/) diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 355271e0c05..ecf8163a9d9 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -377,9 +377,7 @@ private: auto & output = outputs.front(); auto chunk = std::move(single_level_chunks.back()); single_level_chunks.pop_back(); - const auto has_rows = chunk.hasRows(); - if (has_rows) - output.push(std::move(chunk)); + output.push(std::move(chunk)); if (finished && single_level_chunks.empty()) { @@ -387,7 +385,7 @@ private: return Status::Finished; } - return has_rows ? Status::PortFull : Status::Ready; + return Status::PortFull; } /// Read all sources and try to push current bucket. @@ -466,7 +464,8 @@ private: auto block = params->aggregator.prepareBlockAndFillWithoutKey( *first, params->final, first->type != AggregatedDataVariants::Type::without_key); - single_level_chunks.emplace_back(convertToChunk(block)); + if (block.rows() > 0) + single_level_chunks.emplace_back(convertToChunk(block)); } } @@ -493,7 +492,8 @@ private: auto blocks = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); for (auto & block : blocks) - single_level_chunks.emplace_back(convertToChunk(block)); + if (block.rows() > 0) + single_level_chunks.emplace_back(convertToChunk(block)); finished = true; data.reset(); diff --git a/src/Processors/Transforms/PasteJoinTransform.cpp b/src/Processors/Transforms/PasteJoinTransform.cpp new file mode 100644 index 00000000000..ff3e2fb85e5 --- /dev/null +++ b/src/Processors/Transforms/PasteJoinTransform.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + + +PasteJoinAlgorithm::PasteJoinAlgorithm( + JoinPtr table_join_, + const Blocks & input_headers, + size_t max_block_size_) + : table_join(table_join_) + , max_block_size(max_block_size_) + , log(&Poco::Logger::get("PasteJoinAlgorithm")) +{ + if (input_headers.size() != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoinAlgorithm requires exactly two inputs"); + + auto strictness = table_join->getTableJoin().strictness(); + if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PasteJoinAlgorithm is not implemented for strictness {}", strictness); + + auto kind = table_join->getTableJoin().kind(); + if (!isPaste(kind)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PasteJoinAlgorithm is not implemented for kind {}", kind); +} + +static void prepareChunk(Chunk & chunk) +{ + if (!chunk) + return; + + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + chunk.setColumns(std::move(columns), num_rows); +} + +void PasteJoinAlgorithm::initialize(Inputs inputs) +{ + if (inputs.size() != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Two inputs are required, got {}", inputs.size()); + + for (size_t i = 0; i < inputs.size(); ++i) + { + consume(inputs[i], i); + } +} + +void PasteJoinAlgorithm::consume(Input & input, size_t source_num) +{ + if (input.skip_last_row) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported"); + + if (input.permutation) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "permutation is not supported"); + + last_used_row[source_num] = 0; + + prepareChunk(input.chunk); + chunks[source_num] = std::move(input.chunk); +} + +IMergingAlgorithm::Status PasteJoinAlgorithm::merge() +{ + if (chunks[0].empty() || chunks[1].empty()) + return Status({}, true); + if (last_used_row[0] >= chunks[0].getNumRows()) + return Status(0); + if (last_used_row[1] >= chunks[1].getNumRows()) + return Status(1); + /// We have unused rows from both inputs + size_t result_num_rows = std::min(chunks[0].getNumRows() - last_used_row[0], chunks[1].getNumRows() - last_used_row[1]); + + Chunk result; + for (size_t source_num = 0; source_num < 2; ++source_num) + for (const auto & col : chunks[source_num].getColumns()) + result.addColumn(col->cut(last_used_row[source_num], result_num_rows)); + last_used_row[0] += result_num_rows; + last_used_row[1] += result_num_rows; + return Status(std::move(result)); +} + +PasteJoinTransform::PasteJoinTransform( + JoinPtr table_join, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_) + : IMergingTransform( + input_headers, + output_header, + /* have_all_inputs_= */ true, + limit_hint_, + /* always_read_till_end_= */ false, + /* empty_chunk_on_finish_= */ true, + table_join, input_headers, max_block_size) + , log(&Poco::Logger::get("PasteJoinTransform")) +{ + LOG_TRACE(log, "Use PasteJoinTransform"); +} + +void PasteJoinTransform::onFinish() {}; + +} diff --git a/src/Processors/Transforms/PasteJoinTransform.h b/src/Processors/Transforms/PasteJoinTransform.h new file mode 100644 index 00000000000..7ecf70e18dc --- /dev/null +++ b/src/Processors/Transforms/PasteJoinTransform.h @@ -0,0 +1,88 @@ +#pragma once +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace Poco { class Logger; } + +namespace DB +{ + +class IJoin; +using JoinPtr = std::shared_ptr; + +/* + * This class is used to join chunks from two sorted streams. + * It is used in MergeJoinTransform. + */ +class PasteJoinAlgorithm final : public IMergingAlgorithm +{ +public: + explicit PasteJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); + + const char * getName() const override { return "PasteJoinAlgorithm"; } + virtual void initialize(Inputs inputs) override; + virtual void consume(Input & input, size_t source_num) override; + virtual Status merge() override; + + void logElapsed(double seconds); + +private: + Chunk createBlockWithDefaults(size_t source_num); + Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; + + /// For `USING` join key columns should have values from right side instead of defaults + std::unordered_map left_to_right_key_remap; + + std::array chunks; + + JoinPtr table_join; + + size_t max_block_size; + + struct Statistic + { + size_t num_blocks[2] = {0, 0}; + size_t num_rows[2] = {0, 0}; + + size_t max_blocks_loaded = 0; + }; + + Statistic stat; + + Poco::Logger * log; + UInt64 last_used_row[2] = {0, 0}; +}; + +class PasteJoinTransform final : public IMergingTransform +{ + using Base = IMergingTransform; + +public: + PasteJoinTransform( + JoinPtr table_join, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint = 0); + + String getName() const override { return "PasteJoinTransform"; } + +protected: + void onFinish() override; + + Poco::Logger * log; +}; + +} diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp new file mode 100644 index 00000000000..6e3cdc0fda1 --- /dev/null +++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp @@ -0,0 +1,129 @@ +#include + +#include +#include + +namespace DB +{ +ScatterByPartitionTransform::ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_) + : IProcessor(InputPorts{header}, OutputPorts{output_size_, header}) + , output_size(output_size_) + , key_columns(std::move(key_columns_)) + , hash(0) +{} + +IProcessor::Status ScatterByPartitionTransform::prepare() +{ + auto & input = getInputs().front(); + + /// Check all outputs are finished or ready to get data. + + bool all_finished = true; + for (auto & output : outputs) + { + if (output.isFinished()) + continue; + + all_finished = false; + } + + if (all_finished) + { + input.close(); + return Status::Finished; + } + + if (!all_outputs_processed) + { + auto output_it = outputs.begin(); + bool can_push = false; + for (size_t i = 0; i < output_size; ++i, ++output_it) + if (!was_output_processed[i] && output_it->canPush()) + can_push = true; + if (!can_push) + return Status::PortFull; + return Status::Ready; + } + /// Try get chunk from input. + + if (input.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + chunk = input.pull(); + has_data = true; + was_output_processed.assign(outputs.size(), false); + + return Status::Ready; +} + +void ScatterByPartitionTransform::work() +{ + if (all_outputs_processed) + generateOutputChunks(); + all_outputs_processed = true; + + size_t chunk_number = 0; + for (auto & output : outputs) + { + auto & was_processed = was_output_processed[chunk_number]; + auto & output_chunk = output_chunks[chunk_number]; + ++chunk_number; + + if (was_processed) + continue; + + if (output.isFinished()) + continue; + + if (!output.canPush()) + { + all_outputs_processed = false; + continue; + } + + output.push(std::move(output_chunk)); + was_processed = true; + } + + if (all_outputs_processed) + { + has_data = false; + output_chunks.clear(); + } +} + +void ScatterByPartitionTransform::generateOutputChunks() +{ + auto num_rows = chunk.getNumRows(); + const auto & columns = chunk.getColumns(); + + hash.reset(num_rows); + + for (const auto & column_number : key_columns) + columns[column_number]->updateWeakHash32(hash); + + const auto & hash_data = hash.getData(); + IColumn::Selector selector(num_rows); + + for (size_t row = 0; row < num_rows; ++row) + selector[row] = hash_data[row] % output_size; + + output_chunks.resize(output_size); + for (const auto & column : columns) + { + auto filtered_columns = column->scatter(output_size, selector); + for (size_t i = 0; i < output_size; ++i) + output_chunks[i].addColumn(std::move(filtered_columns[i])); + } +} + +} diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.h b/src/Processors/Transforms/ScatterByPartitionTransform.h new file mode 100644 index 00000000000..327f6dd62b4 --- /dev/null +++ b/src/Processors/Transforms/ScatterByPartitionTransform.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +struct ScatterByPartitionTransform : IProcessor +{ + ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_); + + String getName() const override { return "ScatterByPartitionTransform"; } + + Status prepare() override; + void work() override; + +private: + + void generateOutputChunks(); + + size_t output_size; + ColumnNumbers key_columns; + + bool has_data = false; + bool all_outputs_processed = true; + std::vector was_output_processed; + Chunk chunk; + + WeakHash32 hash; + Chunks output_chunks; +}; + +} diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 7afc7a38aab..47b5b900400 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -67,7 +67,7 @@ public: // Must insert the result for current_row. virtual void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) = 0; + size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } }; @@ -1463,7 +1463,7 @@ struct WindowFunctionRank final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { IColumn & to = *transform->blockAt(transform->current_row) .output_columns[function_index]; @@ -1482,7 +1482,7 @@ struct WindowFunctionDenseRank final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { IColumn & to = *transform->blockAt(transform->current_row) .output_columns[function_index]; @@ -1561,7 +1561,7 @@ struct StatefulWindowFunction : public WindowFunction bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v; } - State & getState(const WindowFunctionWorkspace & workspace) + State & getState(const WindowFunctionWorkspace & workspace) const { return *static_cast(static_cast(workspace.aggregate_function_state.data())); } @@ -1585,17 +1585,21 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc static constexpr size_t ARGUMENT_VALUE = 0; static constexpr size_t ARGUMENT_TIME = 1; - WindowFunctionExponentialTimeDecayedSum(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + static Float64 getDecayLength(const Array & parameters_, const std::string & name_) { if (parameters_.size() != 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_); } - decay_length = applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + } + WindowFunctionExponentialTimeDecayedSum(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + , decay_length(getDecayLength(parameters_, name_)) + { if (argument_types.size() != 2) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -1622,7 +1626,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & workspace = transform->workspaces[function_index]; auto & state = getState(workspace); @@ -1670,7 +1674,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc } private: - Float64 decay_length; + const Float64 decay_length; }; struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction @@ -1678,17 +1682,21 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction static constexpr size_t ARGUMENT_VALUE = 0; static constexpr size_t ARGUMENT_TIME = 1; - WindowFunctionExponentialTimeDecayedMax(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) + static Float64 getDecayLength(const Array & parameters_, const std::string & name_) { if (parameters_.size() != 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_); } - decay_length = applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + } + WindowFunctionExponentialTimeDecayedMax(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) + , decay_length(getDecayLength(parameters_, name_)) + { if (argument_types.size() != 2) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -1715,7 +1723,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { Float64 result = std::numeric_limits::quiet_NaN(); @@ -1742,24 +1750,28 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction } private: - Float64 decay_length; + const Float64 decay_length; }; struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFunction { static constexpr size_t ARGUMENT_TIME = 0; - WindowFunctionExponentialTimeDecayedCount(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + static Float64 getDecayLength(const Array & parameters_, const std::string & name_) { if (parameters_.size() != 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_); } - decay_length = applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + } + WindowFunctionExponentialTimeDecayedCount(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + , decay_length(getDecayLength(parameters_, name_)) + { if (argument_types.size() != 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -1778,7 +1790,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & workspace = transform->workspaces[function_index]; auto & state = getState(workspace); @@ -1823,7 +1835,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu } private: - Float64 decay_length; + const Float64 decay_length; }; struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunction @@ -1831,17 +1843,21 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc static constexpr size_t ARGUMENT_VALUE = 0; static constexpr size_t ARGUMENT_TIME = 1; - WindowFunctionExponentialTimeDecayedAvg(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + static Float64 getDecayLength(const Array & parameters_, const std::string & name_) { if (parameters_.size() != 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_); } - decay_length = applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); + } + WindowFunctionExponentialTimeDecayedAvg(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + , decay_length(getDecayLength(parameters_, name_)) + { if (argument_types.size() != 2) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -1868,7 +1884,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & workspace = transform->workspaces[function_index]; auto & state = getState(workspace); @@ -1933,7 +1949,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc } private: - Float64 decay_length; + const Float64 decay_length; }; struct WindowFunctionRowNumber final : public WindowFunction @@ -1946,7 +1962,7 @@ struct WindowFunctionRowNumber final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { IColumn & to = *transform->blockAt(transform->current_row) .output_columns[function_index]; @@ -1955,12 +1971,30 @@ struct WindowFunctionRowNumber final : public WindowFunction } }; +namespace +{ + struct NtileState + { + UInt64 buckets = 0; + RowNumber start_row; + UInt64 current_partition_rows = 0; + UInt64 current_partition_inserted_row = 0; + + void windowInsertResultInto( + const WindowTransform * transform, + size_t function_index, + const DataTypes & argument_types); + + static void checkWindowFrameType(const WindowTransform * transform); + }; +} + // Usage: ntile(n). n is the number of buckets. -struct WindowFunctionNtile final : public WindowFunction +struct WindowFunctionNtile final : public StatefulWindowFunction { WindowFunctionNtile(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (argument_types.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one argument", name_); @@ -1981,7 +2015,20 @@ struct WindowFunctionNtile final : public WindowFunction } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override + { + const auto & workspace = transform->workspaces[function_index]; + auto & state = getState(workspace); + state.windowInsertResultInto(transform, function_index, argument_types); + } +}; + +namespace +{ + void NtileState::windowInsertResultInto( + const WindowTransform * transform, + size_t function_index, + const DataTypes & argument_types) { if (!buckets) [[unlikely]] { @@ -2072,13 +2119,8 @@ struct WindowFunctionNtile final : public WindowFunction bucket_num += 1; } } -private: - UInt64 buckets = 0; - RowNumber start_row; - UInt64 current_partition_rows = 0; - UInt64 current_partition_inserted_row = 0; - static void checkWindowFrameType(const WindowTransform * transform) + void NtileState::checkWindowFrameType(const WindowTransform * transform) { if (transform->order_by_indices.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); @@ -2093,7 +2135,7 @@ private: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); } } -}; +} // ClickHouse-specific variant of lag/lead that respects the window frame. template @@ -2165,7 +2207,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & current_block = transform->blockAt(transform->current_row); IColumn & to = *current_block.output_columns[function_index]; @@ -2255,7 +2297,7 @@ struct WindowFunctionNthValue final : public WindowFunction bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & current_block = transform->blockAt(transform->current_row); IColumn & to = *current_block.output_columns[function_index]; @@ -2298,16 +2340,18 @@ struct NonNegativeDerivativeState Float64 previous_timestamp = 0; }; -// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND]) -struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction +struct NonNegativeDerivativeParams { static constexpr size_t ARGUMENT_METRIC = 0; static constexpr size_t ARGUMENT_TIMESTAMP = 1; static constexpr size_t ARGUMENT_INTERVAL = 2; - WindowFunctionNonNegativeDerivative(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + Float64 interval_length = 1; + bool interval_specified = false; + Int64 ts_scale_multiplier = 0; + + NonNegativeDerivativeParams( + const std::string & name_, const DataTypes & argument_types, const Array & parameters) { if (!parameters.empty()) { @@ -2365,11 +2409,23 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction interval_specified = true; } } +}; + +// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND]) +struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction, public NonNegativeDerivativeParams +{ + using Params = NonNegativeDerivativeParams; + + WindowFunctionNonNegativeDerivative(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + , NonNegativeDerivativeParams(name, argument_types, parameters) + {} bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override + size_t function_index) const override { const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; @@ -2405,10 +2461,6 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction WindowFunctionHelpers::setValueToOutputColumn(transform, function_index, result >= 0 ? result : 0); } -private: - Float64 interval_length = 1; - bool interval_specified = false; - Int64 ts_scale_multiplier = 0; }; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 21eb07a5acd..a0fabe3273c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } void QueryPipelineBuilder::checkInitialized() @@ -354,7 +356,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->pipe.dropExtremes(); right->pipe.dropExtremes(); - if (left->getNumStreams() != 1 || right->getNumStreams() != 1) + if ((left->getNumStreams() != 1 || right->getNumStreams() != 1) && join->getTableJoin().kind() == JoinKind::Paste) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Paste JOIN requires sorted tables only"); + else if (left->getNumStreams() != 1 || right->getNumStreams() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join is supported only for pipelines with one output port"); if (left->hasTotals() || right->hasTotals()) @@ -362,9 +366,16 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped Blocks inputs = {left->getHeader(), right->getHeader()}; - auto joining = std::make_shared(join, inputs, out_header, max_block_size); - - return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); + if (join->getTableJoin().kind() == JoinKind::Paste) + { + auto joining = std::make_shared(join, inputs, out_header, max_block_size); + return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); + } + else + { + auto joining = std::make_shared(join, inputs, out_header, max_block_size); + return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); + } } std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLeft( diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index ac722656899..1a12c09a8c7 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -136,7 +136,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl() WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - size_t keep_alive_timeout_, + UInt64 keep_alive_timeout_, bool compress_, CompressionMethod compression_method_) : BufferWithOwnMemory(DBMS_DEFAULT_BUFFER_SIZE) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 94202e1e0e8..38345f27952 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -36,7 +36,7 @@ public: WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - size_t keep_alive_timeout_, + UInt64 keep_alive_timeout_, bool compress_ = false, /// If true - set Content-Encoding header and compress the result. CompressionMethod compression_method_ = CompressionMethod::None); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index ffa8c11fa44..f53c0094ef7 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -616,12 +616,10 @@ void HTTPHandler::processQuery( size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE; size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0; - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); - used_output.out = std::make_shared( response, request.getMethod() == HTTPRequest::HTTP_HEAD, - keep_alive_timeout, + context->getServerSettings().keep_alive_timeout.totalSeconds(), client_supports_http_compression, http_response_compression_method); @@ -731,8 +729,8 @@ void HTTPHandler::processQuery( /// to some other value. const auto & settings = context->getSettingsRef(); - /// Only readonly queries are allowed for HTTP GET requests. - if (request.getMethod() == HTTPServerRequest::HTTP_GET) + /// Anything else beside HTTP POST should be readonly queries. + if (request.getMethod() != HTTPServerRequest::HTTP_POST) { if (settings.readonly == 0) context->setSetting("readonly", 2); diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index e1ee9586f83..66b55f68217 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -7,7 +7,6 @@ #include #include "HTTPHandler.h" -#include "NotFoundHandler.h" #include "StaticRequestHandler.h" #include "ReplicasStatusHandler.h" #include "InterserverIOHTTPHandler.h" @@ -161,6 +160,12 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); + auto binary_handler = std::make_shared>(server); + binary_handler->attachNonStrictPath("/binary"); + binary_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/binary"); + factory.addHandler(binary_handler); + auto js_handler = std::make_shared>(server); js_handler->attachNonStrictPath("/js/"); js_handler->allowGetAndHeadRequest(); diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 5f6da208778..53773a83b40 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -87,8 +87,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe response.setChunkedTransferEncoding(true); Output used_output; - const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); used_output.out = std::make_shared( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 6d35386bfb5..127ed843cb6 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -17,6 +17,9 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { + /// Raw config reference is used here to avoid dependency on Context and ServerSettings. + /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 + /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 2b7a52572a8..c30c3ebaa77 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -84,7 +84,8 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } } - setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT)); + const auto & server_settings = getContext()->getServerSettings(); + setResponseDefaultHeaders(response, server_settings.keep_alive_timeout.totalSeconds()); if (!ok) { diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index a7e85d161c1..34cb5d2d169 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes } static inline WriteBufferPtr -responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout) +responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout) { /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); @@ -90,7 +90,7 @@ static inline void trySendExceptionToClient( void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); const auto & out = responseWriteBuffer(request, response, keep_alive_timeout); try diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f2d7a58119f..a563e0e0004 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -657,7 +657,7 @@ void TCPHandler::runImpl() catch (...) { state.io.onException(); - exception = std::make_unique(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"); + exception = std::make_unique(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception")); } try diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 12d2588723e..ad48b38b91a 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -1,10 +1,10 @@ #include "WebUIRequestHandler.h" #include "IServer.h" -#include #include #include +#include #include #ifdef __clang__ @@ -24,6 +24,7 @@ INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); +INCBIN(resource_binary_html, SOURCE_DIR "/programs/server/binary.html"); namespace DB @@ -37,7 +38,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_) void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); response.setContentType("text/html; charset=UTF-8"); @@ -67,6 +68,11 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR *response.send() << html; } + else if (request.getURI().starts_with("/binary")) + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); + *response.send() << std::string_view(reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize); + } else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 428a4e5c24b..28f9ec6677a 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -692,6 +692,15 @@ public: /// when considering in-memory blocks. virtual std::optional totalBytes(const Settings &) const { return {}; } + /// If it is possible to quickly determine exact number of uncompressed bytes for the table on storage: + /// - disk (uncompressed) + /// + /// Used for: + /// - For total_bytes_uncompressed column in system.tables + /// + /// Does not take underlying Storage (if any) into account + virtual std::optional totalBytesUncompressed(const Settings &) const { return {}; } + /// Number of rows INSERTed since server start. /// /// Does not take the underlying Storage (if any) into account. diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 2bf2f9fddc7..c54947dde8e 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -522,11 +522,10 @@ std::pair Fetcher::fetchSelected uri, Poco::Net::HTTPRequest::HTTP_POST, nullptr, - timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, 0, /* no redirects */ - static_cast(data_settings->replicated_max_parallel_fetches_for_host)); + context->getCommonFetchesSessionFactory()); int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 38ecd8f4067..87f23b0da2a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1142,6 +1142,7 @@ void IMergeTreeDataPart::loadChecksums(bool require) { assertEOF(*buf); bytes_on_disk = checksums.getTotalSizeOnDisk(); + bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk(); } else bytes_on_disk = getDataPartStorage().calculateTotalSizeOnDisk(); @@ -1159,6 +1160,7 @@ void IMergeTreeDataPart::loadChecksums(bool require) writeChecksums(checksums, {}); bytes_on_disk = checksums.getTotalSizeOnDisk(); + bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk(); } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 06e0712646a..640a1f1d0a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -370,7 +370,9 @@ public: UInt64 getIndexSizeFromFile() const; UInt64 getBytesOnDisk() const { return bytes_on_disk; } + UInt64 getBytesUncompressedOnDisk() const { return bytes_uncompressed_on_disk; } void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } + void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; } size_t getFileSizeOrZero(const String & file_name) const; auto getFilesChecksums() const { return checksums.files; } @@ -566,6 +568,7 @@ protected: /// Total size on disk, not only columns. May not contain size of /// checksums.txt and columns.txt. 0 - if not counted; UInt64 bytes_on_disk{0}; + UInt64 bytes_uncompressed_on_disk{0}; /// Columns description. Cannot be changed, after part initialization. NamesAndTypesList columns; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 4609df203b9..8b5e9ba96ee 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -570,6 +570,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const global_ctx->storage_snapshot, global_ctx->future_part->parts[part_num], column_names, + /*mark_ranges=*/ {}, + /*apply_deleted_mask=*/ true, ctx->read_with_direct_io, /*take_column_types_from_storage=*/ true, /*quiet=*/ false, @@ -922,6 +924,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() global_ctx->storage_snapshot, part, global_ctx->merging_column_names, + /*mark_ranges=*/ {}, + /*apply_deleted_mask=*/ true, ctx->read_with_direct_io, /*take_column_types_from_storage=*/ true, /*quiet=*/ false, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d97f337c9c9..914affcc8f9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2325,6 +2325,7 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa part_log_elem.partition_id = part->info.partition_id; part_log_elem.part_name = part->name; part_log_elem.bytes_compressed_on_disk = part->getBytesOnDisk(); + part_log_elem.bytes_uncompressed = part->getBytesUncompressedOnDisk(); part_log_elem.rows = part->rows_count; part_log_elem.part_type = part->getType(); @@ -2802,8 +2803,6 @@ void MergeTreeData::dropAllData() void MergeTreeData::dropIfEmpty() { - LOG_TRACE(log, "dropIfEmpty"); - auto lock = lockParts(); if (!data_parts_by_info.empty()) @@ -4835,10 +4834,18 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context partition_size += part->getBytesOnDisk(); auto table_id = getStorageID(); + + const auto & query_settings = local_context->getSettingsRef(); + if (query_settings.max_partition_size_to_drop.changed) + { + getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size, query_settings.max_partition_size_to_drop); + return; + } + getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size); } -void MergeTreeData::checkPartCanBeDropped(const String & part_name) +void MergeTreeData::checkPartCanBeDropped(const String & part_name, ContextPtr local_context) { if (!supportsReplication() && isStaticStorage()) return; @@ -4848,6 +4855,14 @@ void MergeTreeData::checkPartCanBeDropped(const String & part_name) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name); auto table_id = getStorageID(); + + const auto & query_settings = local_context->getSettingsRef(); + if (query_settings.max_partition_size_to_drop.changed) + { + getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk(), query_settings.max_partition_size_to_drop); + return; + } + getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk()); } @@ -5035,7 +5050,7 @@ Pipe MergeTreeData::alterPartition( if (command.part) { auto part_name = command.partition->as().value.safeGet(); - checkPartCanBeDropped(part_name); + checkPartCanBeDropped(part_name, query_context); dropPart(part_name, command.detach, query_context); } else @@ -7493,6 +7508,7 @@ try part_log_elem.disk_name = result_part->getDataPartStorage().getDiskName(); part_log_elem.path_on_disk = result_part->getDataPartStorage().getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); + part_log_elem.bytes_uncompressed = result_part->getBytesUncompressedOnDisk(); part_log_elem.rows = result_part->rows_count; part_log_elem.part_type = result_part->getType(); } @@ -7507,7 +7523,6 @@ try part_log_elem.bytes_read_uncompressed = (*merge_entry)->bytes_read_uncompressed; part_log_elem.rows = (*merge_entry)->rows_written; - part_log_elem.bytes_uncompressed = (*merge_entry)->bytes_written_uncompressed; part_log_elem.peak_memory_usage = (*merge_entry)->getMemoryTracker().getPeak(); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index fc1d9085527..c69c7aaba3d 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -795,7 +795,7 @@ public: /// We do not use mutex because it is not very important that the size could change during the operation. void checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context); - void checkPartCanBeDropped(const String & part_name); + void checkPartCanBeDropped(const String & part_name, ContextPtr local_context); Pipe alterPartition( const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 4bda5ce469d..aa1968794f9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -112,6 +112,14 @@ UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const return res; } +UInt64 MergeTreeDataPartChecksums::getTotalSizeUncompressedOnDisk() const +{ + UInt64 res = 0; + for (const auto & [_, checksum] : files) + res += checksum.uncompressed_size; + return res; +} + bool MergeTreeDataPartChecksums::read(ReadBuffer & in, size_t format_version) { switch (format_version) diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 8e5e8c8c448..837b940e354 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -88,6 +88,7 @@ struct MergeTreeDataPartChecksums static MergeTreeDataPartChecksums deserializeFrom(const String & s); UInt64 getTotalSizeOnDisk() const; + UInt64 getTotalSizeUncompressedOnDisk() const; }; /// A kind of MergeTreeDataPartChecksums intended to be stored in ZooKeeper (to save its RAM) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8485f3f3e1d..fd7ae849388 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -92,7 +92,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( for (const auto & part : parts) { - MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log); + MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, {}, settings, log); /** In order to get a lower bound on the number of rows that match the condition on PK, * consider only guaranteed full marks. @@ -770,6 +770,35 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( return sampling; } +void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( + std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context) +{ + if (!filter_dag) + return; + + auto part_offset_type = std::make_shared(); + auto part_type = std::make_shared(std::make_shared()); + Block sample + = {ColumnWithTypeAndName(part_offset_type->createColumn(), part_offset_type, "_part_offset"), + ColumnWithTypeAndName(part_type->createColumn(), part_type, "_part")}; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), sample); + if (!dag) + return; + + /// The _part filter should only be effective in conjunction with the _part_offset filter. + auto required_columns = dag->getRequiredColumnsNames(); + if (std::find(required_columns.begin(), required_columns.end(), "_part_offset") == required_columns.end()) + return; + + part_offset_condition.emplace(KeyCondition{ + dag, + context, + sample.getNames(), + std::make_shared(std::make_shared(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), + {}}); +} + std::optional> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns( const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, @@ -909,6 +938,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd StorageMetadataPtr metadata_snapshot, const ContextPtr & context, const KeyCondition & key_condition, + const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, Poco::Logger * log, @@ -928,7 +958,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { - Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size); + Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) @@ -983,8 +1013,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd RangesInDataPart ranges(part, alter_conversions_for_part, part_index); size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal(); - if (metadata_snapshot->hasPrimaryKey()) - ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log); + if (metadata_snapshot->hasPrimaryKey() || part_offset_condition) + ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, part_offset_condition, settings, log); else if (total_marks_count) ranges.ranges = MarkRanges{{MarkRange{0, total_marks_count}}}; @@ -1404,6 +1434,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( const MergeTreeData::DataPartPtr & part, const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, + const std::optional & part_offset_condition, const Settings & settings, Poco::Logger * log) { @@ -1417,7 +1448,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( bool has_final_mark = part->index_granularity.hasFinalMark(); /// If index is not used. - if (key_condition.alwaysUnknownOrTrue()) + if (key_condition.alwaysUnknownOrTrue() && (!part_offset_condition || part_offset_condition->alwaysUnknownOrTrue())) { if (has_final_mark) res.push_back(MarkRange(0, marks_count - 1)); @@ -1467,32 +1498,69 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::vector index_left(used_key_size); std::vector index_right(used_key_size); + /// For _part_offset and _part virtual columns + DataTypes part_offset_types + = {std::make_shared(), std::make_shared(std::make_shared())}; + std::vector part_offset_left(2); + std::vector part_offset_right(2); + auto may_be_true_in_range = [&](MarkRange & range) { - if (range.end == marks_count && !has_final_mark) + bool key_condition_maybe_true = true; + if (!key_condition.alwaysUnknownOrTrue()) { - for (size_t i = 0; i < used_key_size; ++i) + if (range.end == marks_count && !has_final_mark) { - create_field_ref(range.begin, i, index_left[i]); - index_right[i] = POSITIVE_INFINITY; + for (size_t i = 0; i < used_key_size; ++i) + { + create_field_ref(range.begin, i, index_left[i]); + index_right[i] = POSITIVE_INFINITY; + } } - } - else - { - if (has_final_mark && range.end == marks_count) - range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition. + else + { + if (has_final_mark && range.end == marks_count) + range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition. - for (size_t i = 0; i < used_key_size; ++i) + for (size_t i = 0; i < used_key_size; ++i) + { + create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.end, i, index_right[i]); + } + } + key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types); + } + + bool part_offset_condition_maybe_true = true; + + if (part_offset_condition && !part_offset_condition->alwaysUnknownOrTrue()) + { + auto begin = part->index_granularity.getMarkStartingRow(range.begin); + auto end = part->index_granularity.getMarkStartingRow(range.end) - 1; + if (begin > end) { - create_field_ref(range.begin, i, index_left[i]); - create_field_ref(range.end, i, index_right[i]); + /// Empty mark (final mark) + part_offset_condition_maybe_true = false; + } + else + { + part_offset_left[0] = part->index_granularity.getMarkStartingRow(range.begin); + part_offset_right[0] = part->index_granularity.getMarkStartingRow(range.end) - 1; + part_offset_left[1] = part->name; + part_offset_right[1] = part->name; + + part_offset_condition_maybe_true + = part_offset_condition->mayBeTrueInRange(2, part_offset_left.data(), part_offset_right.data(), part_offset_types); } } - return key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types); + return key_condition_maybe_true && part_offset_condition_maybe_true; }; + bool key_condition_exact_range = key_condition.alwaysUnknownOrTrue() || key_condition.matchesExactContinuousRange(); + bool part_offset_condition_exact_range + = !part_offset_condition || part_offset_condition->alwaysUnknownOrTrue() || part_offset_condition->matchesExactContinuousRange(); const String & part_name = part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPart()->name) : part->name; - if (!key_condition.matchesExactContinuousRange()) + if (!key_condition_exact_range || !part_offset_condition_exact_range) { // Do exclusion search, where we drop ranges that do not match diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 01c2da9dd63..11c8e172a4f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -71,6 +71,7 @@ public: const MergeTreeData::DataPartPtr & part, const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, + const std::optional & part_offset_condition, const Settings & settings, Poco::Logger * log); @@ -161,6 +162,10 @@ public: size_t bytes_granularity, size_t max_marks); + /// If possible, construct optional key condition from predicates containing _part_offset column. + static void buildKeyConditionFromPartOffset( + std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context); + /// If possible, filter using expression on virtual columns. /// Example: SELECT count() FROM table WHERE _part = 'part_name' /// If expression found, return a set with allowed part names (std::nullopt otherwise). @@ -199,6 +204,7 @@ public: StorageMetadataPtr metadata_snapshot, const ContextPtr & context, const KeyCondition & key_condition, + const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, Poco::Logger * log, diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index a889e58bfec..dc8ed368011 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -212,7 +212,7 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t { auto rc = index->add(static_cast(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]); if (!rc) - throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release()); ProfileEvents::increment(ProfileEvents::USearchAddCount); ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members); @@ -243,7 +243,7 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t { auto rc = index->add(static_cast(index->size()), item.data()); if (!rc) - throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release()); ProfileEvents::increment(ProfileEvents::USearchAddCount); ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members); diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index a586997360a..69cf3cb4266 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -131,6 +131,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() .withSystemColumns(); + if (storage.supportsSubcolumns()) options.withSubcolumns(); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); @@ -241,19 +242,24 @@ Pipe createMergeTreeSequentialSource( const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, Names columns_to_read, + std::optional mark_ranges, + bool apply_deleted_mask, bool read_with_direct_io, bool take_column_types_from_storage, bool quiet, std::shared_ptr> filtered_rows_count) { + const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN; + /// The part might have some rows masked by lightweight deletes - const bool need_to_filter_deleted_rows = data_part->hasLightweightDelete(); - auto columns = columns_to_read; - if (need_to_filter_deleted_rows) - columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name); + const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete(); + const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end(); + + if (need_to_filter_deleted_rows && !has_filter_column) + columns_to_read.emplace_back(filter_column.name); auto column_part_source = std::make_shared( - storage, storage_snapshot, data_part, columns, std::optional{}, + storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), /*apply_deleted_mask=*/ false, read_with_direct_io, take_column_types_from_storage, quiet); Pipe pipe(std::move(column_part_source)); @@ -261,10 +267,10 @@ Pipe createMergeTreeSequentialSource( /// Add filtering step that discards deleted rows if (need_to_filter_deleted_rows) { - pipe.addSimpleTransform([filtered_rows_count](const Block & header) + pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header) { return std::make_shared( - header, nullptr, LightweightDeleteDescription::FILTER_COLUMN.name, true, false, filtered_rows_count); + header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count); }); } @@ -316,7 +322,7 @@ public: if (!key_condition.alwaysFalse()) mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange( - data_part, metadata_snapshot, key_condition, context->getSettingsRef(), log); + data_part, metadata_snapshot, key_condition, {}, context->getSettingsRef(), log); if (mark_ranges && mark_ranges->empty()) { @@ -325,9 +331,17 @@ public: } } - auto source = std::make_unique( - storage, storage_snapshot, data_part, columns_to_read, - std::move(mark_ranges), apply_deleted_mask, false, true); + auto source = createMergeTreeSequentialSource( + storage, + storage_snapshot, + data_part, + columns_to_read, + std::move(mark_ranges), + apply_deleted_mask, + /*read_with_direct_io=*/ false, + /*take_column_types_from_storage=*/ true, + /*quiet=*/ false, + /*filtered_rows_count=*/ nullptr); pipeline.init(Pipe(std::move(source))); } @@ -343,7 +357,7 @@ private: Poco::Logger * log; }; -void createMergeTreeSequentialSource( +void createReadFromPartStep( QueryPlan & plan, const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index fb249568e8f..396d3f76886 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -15,6 +15,8 @@ Pipe createMergeTreeSequentialSource( const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, Names columns_to_read, + std::optional mark_ranges, + bool apply_deleted_mask, bool read_with_direct_io, bool take_column_types_from_storage, bool quiet, @@ -22,7 +24,7 @@ Pipe createMergeTreeSequentialSource( class QueryPlan; -void createMergeTreeSequentialSource( +void createReadFromPartStep( QueryPlan & plan, const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 1906f130101..e0015cdeb40 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -212,4 +212,14 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const merge_selecting_sleep_slowdown_factor); } } + + +std::vector MergeTreeSettings::getAllRegisteredNames() const +{ + std::vector all_settings; + for (const auto & setting_field : all()) + all_settings.push_back(setting_field.getName()); + return all_settings; +} + } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index d69fd289ac0..106e66d8a99 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -115,10 +116,6 @@ struct Settings; M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \ M(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \ M(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \ - M(UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT, "Limit parallel fetches from endpoint (actually pool size).", 0) \ - M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ - M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ - M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \ M(Bool, replicated_can_become_leader, true, "If true, Replicated tables replicas on this node will try to acquire leadership.", 0) \ M(Seconds, zookeeper_session_expiration_check_period, 60, "ZooKeeper session expiration check period, in seconds.", 0) \ M(Seconds, initialization_retry_period, 60, "Retry period for table initialization, in seconds.", 0) \ @@ -231,7 +228,10 @@ struct Settings; MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Bool, use_metadata_cache, false) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_enable_clear_old_broken_detached, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30) \ - MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, async_block_ids_cache_min_update_interval_ms, 1000) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_connection_timeout, 0) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_send_timeout, 0) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_receive_timeout, 0) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, String, clean_deleted_rows, "") \ /// Settings that should not change after the creation of a table. @@ -249,7 +249,7 @@ DECLARE_SETTINGS_TRAITS(MergeTreeSettingsTraits, LIST_OF_MERGE_TREE_SETTINGS) /** Settings for the MergeTree family of engines. * Could be loaded from config or from a CREATE TABLE query (SETTINGS clause). */ -struct MergeTreeSettings : public BaseSettings +struct MergeTreeSettings : public BaseSettings, public IHints<2> { void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); @@ -270,6 +270,8 @@ struct MergeTreeSettings : public BaseSettings /// Check that the values are sane taking also query-level settings into account. void sanityCheck(size_t background_pool_tasks) const; + + std::vector getAllRegisteredNames() const override; }; using MergeTreeSettingsPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 07a204d4325..8b34c221eec 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -184,6 +184,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index = writer->releaseIndexColumns(); new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); + new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 827749aa094..8c896edab14 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -51,7 +51,6 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } - /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. @@ -79,7 +78,8 @@ static void splitAndModifyMutationCommands( || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE - || command.type == MutationCommand::Type::UPDATE) + || command.type == MutationCommand::Type::UPDATE + || command.type == MutationCommand::Type::APPLY_DELETED_MASK) { for_interpreter.push_back(command); for (const auto & [column_name, expr] : command.column_to_update_expression) @@ -202,7 +202,8 @@ static void splitAndModifyMutationCommands( || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE - || command.type == MutationCommand::Type::UPDATE) + || command.type == MutationCommand::Type::UPDATE + || command.type == MutationCommand::Type::APPLY_DELETED_MASK) { for_interpreter.push_back(command); } @@ -257,15 +258,12 @@ getColumnsForNewDataPart( NameToNameMap renamed_columns_from_to; ColumnsDescription part_columns(source_part->getColumns()); NamesAndTypesList system_columns; - if (source_part->supportLightweightDeleteMutate()) - system_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN); - /// Preserve system columns that have persisted values in the source_part - for (const auto & column : system_columns) - { - if (part_columns.has(column.name) && !storage_columns.contains(column.name)) - storage_columns.emplace_back(column); - } + const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN; + bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate(); + + bool deleted_mask_updated = false; + bool has_delete_command = false; NameSet storage_columns_set; for (const auto & [name, _] : storage_columns) @@ -277,23 +275,22 @@ getColumnsForNewDataPart( { for (const auto & [column_name, _] : command.column_to_update_expression) { - /// Allow to update and persist values of system column - auto column = system_columns.tryGetByName(column_name); - if (column && !storage_columns.contains(column_name)) - storage_columns.emplace_back(column_name, column->type); + if (column_name == deleted_mask_column.name + && supports_lightweight_deletes + && !storage_columns_set.contains(deleted_mask_column.name)) + deleted_mask_updated = true; } } + if (command.type == MutationCommand::DELETE || command.type == MutationCommand::APPLY_DELETED_MASK) + has_delete_command = true; + /// If we don't have this column in source part, than we don't need to materialize it if (!part_columns.has(command.column_name)) - { continue; - } if (command.type == MutationCommand::DROP_COLUMN) - { removed_columns.insert(command.column_name); - } if (command.type == MutationCommand::RENAME_COLUMN) { @@ -302,6 +299,15 @@ getColumnsForNewDataPart( } } + if (!storage_columns_set.contains(deleted_mask_column.name)) + { + if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command)) + { + storage_columns.push_back(deleted_mask_column); + storage_columns_set.insert(deleted_mask_column.name); + } + } + SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { @@ -873,6 +879,7 @@ void finalizeMutatedPart( /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. new_data_part->setBytesOnDisk(new_data_part->checksums.getTotalSizeOnDisk()); + new_data_part->setBytesUncompressedOnDisk(new_data_part->checksums.getTotalSizeUncompressedOnDisk()); /// Also use information from checksums new_data_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); @@ -1530,7 +1537,8 @@ private: for (auto & command_for_interpreter : ctx->for_interpreter) { - if (command_for_interpreter.type == MutationCommand::DELETE) + if (command_for_interpreter.type == MutationCommand::DELETE + || command_for_interpreter.type == MutationCommand::APPLY_DELETED_MASK) { has_delete = true; break; @@ -1937,6 +1945,9 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con return true; } + if (command.type == MutationCommand::APPLY_DELETED_MASK && !part->hasLightweightDelete()) + return true; + if (canSkipConversionToNullable(part, command)) return true; diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index a2765c071a2..333a0590d6b 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -23,6 +23,11 @@ #include #include +namespace ProfileEvents +{ + extern const Event ParallelReplicasUsedCount; +} + namespace DB { struct Part @@ -223,13 +228,16 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number) { - LOG_DEBUG(log, "Replica number {} is unavailable", replica_number); + if (stats[replica_number].is_unavailable == false) + { + LOG_DEBUG(log, "Replica number {} is unavailable", replica_number); - ++unavailable_replicas_count; - stats[replica_number].is_unavailable = true; + stats[replica_number].is_unavailable = true; + ++unavailable_replicas_count; - if (sent_initial_requests == replicas_count - unavailable_replicas_count) - finalizeReadingState(); + if (sent_initial_requests == replicas_count - unavailable_replicas_count) + finalizeReadingState(); + } } void DefaultCoordinator::finalizeReadingState() @@ -405,12 +413,13 @@ public: template void InOrderCoordinator::markReplicaAsUnavailable(size_t replica_number) { - LOG_DEBUG(log, "Replica number {} is unavailable", replica_number); + if (stats[replica_number].is_unavailable == false) + { + LOG_DEBUG(log, "Replica number {} is unavailable", replica_number); - stats[replica_number].is_unavailable = true; - ++unavailable_replicas_count; - - /// There is nothing to do else. + stats[replica_number].is_unavailable = true; + ++unavailable_replicas_count; + } } template @@ -569,7 +578,15 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR initialize(); } - return pimpl->handleRequest(std::move(request)); + const auto replica_num = request.replica_num; + auto response = pimpl->handleRequest(std::move(request)); + if (!response.finish) + { + if (replicas_used.insert(replica_num).second) + ProfileEvents::increment(ProfileEvents::ParallelReplicasUsedCount); + } + + return response; } void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica_number) @@ -577,11 +594,9 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica std::lock_guard lock(mutex); if (!pimpl) - { - initialize(); - } - - return pimpl->markReplicaAsUnavailable(replica_number); + unavailable_nodes_registered_before_initialization.push_back(replica_number); + else + pimpl->markReplicaAsUnavailable(replica_number); } void ParallelReplicasReadingCoordinator::initialize() @@ -598,8 +613,12 @@ void ParallelReplicasReadingCoordinator::initialize() pimpl = std::make_unique>(replicas_count); break; } + if (progress_callback) pimpl->setProgressCallback(std::move(progress_callback)); + + for (const auto replica : unavailable_nodes_registered_before_initialization) + pimpl->markReplicaAsUnavailable(replica); } ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_) {} diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 449421797ce..acc265c124f 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -39,6 +39,12 @@ private: std::atomic initialized{false}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation + std::set replicas_used; + + /// To initialize `pimpl` we need to know the coordinator mode. We can know it only from initial announcement or regular request. + /// The problem is `markReplicaAsUnavailable` might be called before any of these requests happened. + /// In this case we will remember the numbers of unavailable replicas and apply this knowledge later on initialization. + std::vector unavailable_nodes_registered_before_initialization; }; using ParallelReplicasReadingCoordinatorPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 85f99e3f8c3..fc924d1f80c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -269,6 +269,12 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor deduplicate_by_columns = std::move(new_deduplicate_by_columns); } + else if (checkString("cleanup: ", in)) + { + /// Obsolete option, does nothing. + bool cleanup = false; + in >> cleanup; + } else trailing_newline_found = true; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b6b3908701f..8b22c61e012 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -40,8 +40,6 @@ namespace ErrorCodes extern const int READONLY; extern const int UNKNOWN_STATUS_OF_INSERT; extern const int INSERT_WAS_DEDUPLICATED; - extern const int TIMEOUT_EXCEEDED; - extern const int NO_ACTIVE_REPLICAS; extern const int DUPLICATE_DATA_PART; extern const int PART_IS_TEMPORARILY_LOCKED; extern const int LOGICAL_ERROR; @@ -160,7 +158,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const size_t replicas_number = 0; - ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); + const auto & settings = context->getSettingsRef(); + ZooKeeperRetriesControl quorum_retries_ctl( + "checkQuorumPrecondition", + log, + {settings.insert_keeper_max_retries, settings.insert_keeper_retry_initial_backoff_ms, settings.insert_keeper_retry_max_backoff_ms}, + context->getProcessListElement()); quorum_retries_ctl.retryLoop( [&]() { @@ -255,12 +258,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); - zookeeper_retries_info = ZooKeeperRetriesInfo( - "ReplicatedMergeTreeSink::consume", - settings.insert_keeper_max_retries ? log : nullptr, - settings.insert_keeper_max_retries, - settings.insert_keeper_retry_initial_backoff_ms, - settings.insert_keeper_retry_max_backoff_ms); ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( settings.insert_keeper_fault_injection_probability, @@ -636,7 +633,12 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: CommitRetryContext retry_context; - ZooKeeperRetriesControl retries_ctl("commitPart", zookeeper_retries_info, context->getProcessListElement()); + const auto & settings = context->getSettingsRef(); + ZooKeeperRetriesControl retries_ctl( + "commitPart", + log, + {settings.insert_keeper_max_retries, settings.insert_keeper_retry_initial_backoff_ms, settings.insert_keeper_retry_max_backoff_ms}, + context->getProcessListElement()); auto resolve_duplicate_stage = [&] () -> CommitRetryContext::Stages { @@ -910,12 +912,8 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: part->name, multi_code, MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER); }); - /// Independently of how many retries we had left we want to do at least one check of this inner retry - /// so a) we try to verify at least once if metadata was written and b) we set the proper final error - /// (UNKNOWN_STATUS_OF_INSERT) if we fail to reconnect to keeper - new_retry_controller.requestUnconditionalRetry(); - bool node_exists = false; + /// The loop will be executed at least once new_retry_controller.retryLoop([&] { fiu_do_on(FailPoints::replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault, { zookeeper->forceFailureBeforeOperation(); }); @@ -1073,7 +1071,26 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: if (quorum_parallel) quorum_info.status_path = storage.zookeeper_path + "/quorum/parallel/" + retry_context.actual_part_name; - waitForQuorum(zookeeper, retry_context.actual_part_name, quorum_info.status_path, quorum_info.is_active_node_version, replicas_num); + ZooKeeperRetriesControl new_retry_controller = retries_ctl; + new_retry_controller.actionAfterLastFailedRetry([&] + { + /// We do not know whether or not data has been inserted in other replicas + new_retry_controller.setUserError( + ErrorCodes::UNKNOWN_STATUS_OF_INSERT, + "Unknown quorum status. The data was inserted in the local replica but we could not verify quorum. Reason: {}", + new_retry_controller.getLastKeeperErrorMessage()); + }); + + new_retry_controller.retryLoop([&]() + { + zookeeper->setKeeper(storage.getZooKeeper()); + waitForQuorum( + zookeeper, + retry_context.actual_part_name, + quorum_info.status_path, + quorum_info.is_active_node_version, + replicas_num); + }); } } @@ -1106,49 +1123,44 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( /// We are waiting for quorum to be satisfied. LOG_TRACE(log, "Waiting for quorum '{}' for part {}{}", quorum_path, part_name, quorumLogMessage(replicas_num)); - try + fiu_do_on(FailPoints::replicated_merge_tree_insert_quorum_fail_0, { zookeeper->forceFailureBeforeOperation(); }); + + while (true) { - fiu_do_on(FailPoints::replicated_merge_tree_insert_quorum_fail_0, { zookeeper->forceFailureBeforeOperation(); }); + zkutil::EventPtr event = std::make_shared(); - while (true) - { - zkutil::EventPtr event = std::make_shared(); + std::string value; + /// `get` instead of `exists` so that `watch` does not leak if the node is no longer there. + if (!zookeeper->tryGet(quorum_path, value, nullptr, event)) + break; - std::string value; - /// `get` instead of `exists` so that `watch` does not leak if the node is no longer there. - if (!zookeeper->tryGet(quorum_path, value, nullptr, event)) - break; + LOG_TRACE(log, "Quorum node {} still exists, will wait for updates", quorum_path); - LOG_TRACE(log, "Quorum node {} still exists, will wait for updates", quorum_path); + ReplicatedMergeTreeQuorumEntry quorum_entry(value); - ReplicatedMergeTreeQuorumEntry quorum_entry(value); + /// If the node has time to disappear, and then appear again for the next insert. + if (quorum_entry.part_name != part_name) + break; - /// If the node has time to disappear, and then appear again for the next insert. - if (quorum_entry.part_name != part_name) - break; + if (!event->tryWait(quorum_timeout_ms)) + throw Exception( + ErrorCodes::UNKNOWN_STATUS_OF_INSERT, + "Unknown quorum status. The data was inserted in the local replica but we could not verify quorum. Reason: " + "Timeout while waiting for quorum"); - if (!event->tryWait(quorum_timeout_ms)) - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout while waiting for quorum"); - - LOG_TRACE(log, "Quorum {} for part {} updated, will check quorum node still exists", quorum_path, part_name); - } - - /// And what if it is possible that the current replica at this time has ceased to be active - /// and the quorum is marked as failed and deleted? - Coordination::Stat stat; - String value; - if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, &stat) - || stat.version != is_active_node_version) - throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "Replica become inactive while waiting for quorum"); - } - catch (...) - { - /// We do not know whether or not data has been inserted - /// - whether other replicas have time to download the part and mark the quorum as done. - throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", - getCurrentExceptionMessage(false)); + LOG_TRACE(log, "Quorum {} for part {} updated, will check quorum node still exists", quorum_path, part_name); } + /// And what if it is possible that the current replica at this time has ceased to be active + /// and the quorum is marked as failed and deleted? + Coordination::Stat stat; + String value; + if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, &stat) || stat.version != is_active_node_version) + throw Exception( + ErrorCodes::UNKNOWN_STATUS_OF_INSERT, + "Unknown quorum status. The data was inserted in the local replica but we could not verify quorum. Reason: " + "Replica became inactive while waiting for quorum"); + LOG_TRACE(log, "Quorum '{}' for part {} satisfied", quorum_path, part_name); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index ded35aa015b..5c70d0c76e1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -74,7 +74,6 @@ private: using BlockIDsType = std::conditional_t, String>; - ZooKeeperRetriesInfo zookeeper_retries_info; struct QuorumInfo { String status_path; diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index e46c3f974c7..15874b8f675 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -5,6 +5,8 @@ #include #include +#include + namespace DB { @@ -15,29 +17,31 @@ namespace ErrorCodes struct ZooKeeperRetriesInfo { - ZooKeeperRetriesInfo() = default; - ZooKeeperRetriesInfo(std::string name_, Poco::Logger * logger_, UInt64 max_retries_, UInt64 initial_backoff_ms_, UInt64 max_backoff_ms_) - : name(std::move(name_)) - , logger(logger_) - , max_retries(max_retries_) - , curr_backoff_ms(std::min(initial_backoff_ms_, max_backoff_ms_)) - , max_backoff_ms(max_backoff_ms_) + ZooKeeperRetriesInfo(UInt64 max_retries_, UInt64 initial_backoff_ms_, UInt64 max_backoff_ms_) + : max_retries(max_retries_), initial_backoff_ms(std::min(initial_backoff_ms_, max_backoff_ms_)), max_backoff_ms(max_backoff_ms_) { } - std::string name; - Poco::Logger * logger = nullptr; - UInt64 max_retries = 0; - UInt64 curr_backoff_ms = 0; - UInt64 max_backoff_ms = 0; - UInt64 retry_count = 0; + UInt64 max_retries; + UInt64 initial_backoff_ms; + UInt64 max_backoff_ms; }; class ZooKeeperRetriesControl { public: - ZooKeeperRetriesControl(std::string name_, ZooKeeperRetriesInfo & retries_info_, QueryStatusPtr elem) - : name(std::move(name_)), retries_info(retries_info_), process_list_element(elem) + ZooKeeperRetriesControl(std::string name_, Poco::Logger * logger_, ZooKeeperRetriesInfo retries_info_, QueryStatusPtr elem) + : name(std::move(name_)), logger(logger_), retries_info(retries_info_), process_list_element(elem) + { + } + + ZooKeeperRetriesControl(const ZooKeeperRetriesControl & other) + : name(other.name) + , logger(other.logger) + , retries_info(other.retries_info) + , total_failures(other.total_failures) + , process_list_element(other.process_list_element) + , current_backoff_ms(other.current_backoff_ms) { } @@ -46,7 +50,7 @@ public: retryLoop(f, []() {}); } - /// retryLoop() executes f() until it succeeds/max_retries is reached/non-retrialable error is encountered + /// retryLoop() executes f() until it succeeds/max_retries is reached/non-retryable error is encountered /// /// the callable f() can provide feedback in terms of errors in two ways: /// 1. throw KeeperException exception: @@ -56,10 +60,17 @@ public: /// The idea is that if the caller has some semantics on top of non-hardware keeper errors, /// then it can provide feedback to retries controller via user errors /// + /// It is possible to use it multiple times (it will share nº of errors over the total amount of calls) + /// Each retryLoop is independent and it will execute f at least once void retryLoop(auto && f, auto && iteration_cleanup) { - while (canTry()) + current_iteration = 0; + current_backoff_ms = retries_info.initial_backoff_ms; + + while (current_iteration == 0 || canTry()) { + /// reset the flag, it will be set to false in case of error + iteration_succeeded = true; try { f(); @@ -79,6 +90,7 @@ public: iteration_cleanup(); throw; } + current_iteration++; } } @@ -102,13 +114,11 @@ public: void setUserError(std::exception_ptr exception, int code, std::string message) { - if (retries_info.logger) - LOG_TRACE( - retries_info.logger, "ZooKeeperRetriesControl: {}/{}: setUserError: error={} message={}", retries_info.name, name, code, message); + if (logger) + LOG_TRACE(logger, "ZooKeeperRetriesControl: {}: setUserError: error={} message={}", name, code, message); - /// if current iteration is already failed, keep initial error - if (!iteration_succeeded) - return; + if (iteration_succeeded) + total_failures++; iteration_succeeded = false; user_error.code = code; @@ -136,13 +146,11 @@ public: void setKeeperError(std::exception_ptr exception, Coordination::Error code, std::string message) { - if (retries_info.logger) - LOG_TRACE( - retries_info.logger, "ZooKeeperRetriesControl: {}/{}: setKeeperError: error={} message={}", retries_info.name, name, code, message); + if (logger) + LOG_TRACE(logger, "ZooKeeperRetriesControl: {}: setKeeperError: error={} message={}", name, code, message); - /// if current iteration is already failed, keep initial error - if (!iteration_succeeded) - return; + if (iteration_succeeded) + total_failures++; iteration_succeeded = false; keeper_error.code = code; @@ -170,17 +178,19 @@ public: void stopRetries() { stop_retries = true; } - void requestUnconditionalRetry() { unconditional_retry = true; } + bool isLastRetry() const { return total_failures >= retries_info.max_retries; } - bool isLastRetry() const { return retries_info.retry_count >= retries_info.max_retries; } + bool isRetry() const { return current_iteration > 1; } - bool isRetry() const { return retries_info.retry_count > 0; } - - Coordination::Error getLastKeeperErrorCode() const { return keeper_error.code; } + const std::string & getLastKeeperErrorMessage() const { return keeper_error.message; } /// action will be called only once and only after latest failed retry void actionAfterLastFailedRetry(std::function f) { action_after_last_failed_retry = std::move(f); } + const std::string & getName() const { return name; } + + Poco::Logger * getLogger() const { return logger; } + private: struct KeeperError { @@ -199,59 +209,42 @@ private: bool canTry() { - ++iteration_count; - /// first iteration is ordinary execution, no further checks needed - if (0 == iteration_count) - return true; - - if (process_list_element && !process_list_element->checkTimeLimitSoft()) - return false; - - if (unconditional_retry) - { - unconditional_retry = false; - return true; - } - - /// iteration succeeded -> no need to retry if (iteration_succeeded) { - /// avoid unnecessary logs, - print something only in case of retries - if (retries_info.logger && iteration_count > 1) + if (logger && total_failures > 0) LOG_DEBUG( - retries_info.logger, - "ZooKeeperRetriesControl: {}/{}: succeeded after: iterations={} total_retries={}", - retries_info.name, + logger, + "ZooKeeperRetriesControl: {}: succeeded after: Iterations={} Total keeper failures={}/{}", name, - iteration_count, - retries_info.retry_count); + current_iteration, + total_failures, + retries_info.max_retries); return false; } if (stop_retries) { - logLastError("stop retries on request"); action_after_last_failed_retry(); + logLastError("stop retries on request"); throwIfError(); return false; } - if (retries_info.retry_count >= retries_info.max_retries) + if (total_failures > retries_info.max_retries) { - logLastError("retry limit is reached"); action_after_last_failed_retry(); + logLastError("retry limit is reached"); throwIfError(); return false; } + if (process_list_element && !process_list_element->checkTimeLimitSoft()) + return false; + /// retries - ++retries_info.retry_count; logLastError("will retry due to error"); - sleepForMilliseconds(retries_info.curr_backoff_ms); - retries_info.curr_backoff_ms = std::min(retries_info.curr_backoff_ms * 2, retries_info.max_backoff_ms); - - /// reset the flag, it will be set to false in case of error - iteration_succeeded = true; + sleepForMilliseconds(current_backoff_ms); + current_backoff_ms = std::min(current_backoff_ms * 2, retries_info.max_backoff_ms); return true; } @@ -265,49 +258,52 @@ private: std::rethrow_exception(keeper_error.exception); } - void logLastError(std::string_view header) + void logLastError(const std::string_view & header) { + if (!logger) + return; if (user_error.code == ErrorCodes::OK) { - if (retries_info.logger) - LOG_DEBUG( - retries_info.logger, - "ZooKeeperRetriesControl: {}/{}: {}: retry_count={} timeout={}ms error={} message={}", - retries_info.name, - name, - header, - retries_info.retry_count, - retries_info.curr_backoff_ms, - keeper_error.code, - keeper_error.message); + LOG_DEBUG( + logger, + "ZooKeeperRetriesControl: {}: {}: retry_count={}/{} timeout={}ms error={} message={}", + name, + header, + current_iteration, + retries_info.max_retries, + current_backoff_ms, + keeper_error.code, + keeper_error.message); } else { - if (retries_info.logger) - LOG_DEBUG( - retries_info.logger, - "ZooKeeperRetriesControl: {}/{}: {}: retry_count={} timeout={}ms error={} message={}", - retries_info.name, - name, - header, - retries_info.retry_count, - retries_info.curr_backoff_ms, - user_error.code, - user_error.message); + LOG_DEBUG( + logger, + "ZooKeeperRetriesControl: {}: {}: retry_count={}/{} timeout={}ms error={} message={}", + name, + header, + current_iteration, + retries_info.max_retries, + current_backoff_ms, + user_error.code, + user_error.message); } } std::string name; - ZooKeeperRetriesInfo & retries_info; - Int64 iteration_count = -1; + Poco::Logger * logger = nullptr; + ZooKeeperRetriesInfo retries_info; + UInt64 total_failures = 0; UserError user_error; KeeperError keeper_error; std::function action_after_last_failed_retry = []() {}; - bool unconditional_retry = false; bool iteration_succeeded = true; bool stop_retries = false; QueryStatusPtr process_list_element; + + UInt64 current_iteration = 0; + UInt64 current_backoff_ms = 0; }; } diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 03200d0d9fa..36388a32b41 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -59,6 +59,15 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, } return res; } + else if (command->type == ASTAlterCommand::APPLY_DELETED_MASK) + { + MutationCommand res; + res.ast = command->ptr(); + res.type = APPLY_DELETED_MASK; + res.predicate = command->predicate; + res.partition = command->partition; + return res; + } else if (command->type == ASTAlterCommand::MATERIALIZE_INDEX) { MutationCommand res; diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 014a227dff3..6e10f7d9b2d 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -39,6 +39,7 @@ struct MutationCommand MATERIALIZE_TTL, RENAME_COLUMN, MATERIALIZE_COLUMN, + APPLY_DELETED_MASK, ALTER_WITHOUT_MUTATION, /// pure metadata command, currently unusned }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 665e057b369..a928a4daf63 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1300,8 +1300,6 @@ void StorageDistributed::drop() disk->removeRecursive(relative_data_path); } - - LOG_DEBUG(log, "Removed"); } Strings StorageDistributed::getDataPaths() const @@ -1328,8 +1326,6 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, Co it->second.directory_queue->shutdownAndDropAllData(); it = cluster_nodes_data.erase(it); } - - LOG_DEBUG(log, "Removed"); } StoragePolicyPtr StorageDistributed::getStoragePolicy() const diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index bc4db8f1998..25bb6691ff6 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1935,7 +1935,7 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat } std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); - while (path_to_archive_view.back() == ' ') + while (path_to_archive_view.ends_with(' ')) path_to_archive_view.remove_suffix(1); if (path_to_archive_view.empty()) diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 6bf69efa1dd..87790dd2fdc 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -481,7 +481,11 @@ protected: { Columns columns; columns.reserve(block_header.columns()); - columns.emplace_back(createColumn()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } return {std::move(columns), block_size}; } @@ -719,6 +723,11 @@ void registerStorageFuzzJSON(StorageFactory & factory) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzJSON must have arguments."); StorageFuzzJSON::Configuration configuration = StorageFuzzJSON::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzJSON' supports only columns of String type, got {}.", col.type->getName()); + return std::make_shared(args.table_id, args.columns, args.comment, configuration); }); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index cf9180be1ee..2339fd11cf8 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -460,6 +460,16 @@ std::optional StorageMaterializedView::totalBytes(const Settings & setti return {}; } +std::optional StorageMaterializedView::totalBytesUncompressed(const Settings & settings) const +{ + if (hasInnerTable()) + { + if (auto table = tryGetTargetTable()) + return table->totalBytesUncompressed(settings); + } + return {}; +} + ActionLock StorageMaterializedView::getActionLock(StorageActionBlockType type) { if (has_inner_table) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 03a6cba8cc6..9ddcc458f3e 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -102,6 +102,7 @@ public: std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; + std::optional totalBytesUncompressed(const Settings & settings) const override; private: /// Will be initialized in constructor diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index eb8c52f8936..9378aaa1f6a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -272,6 +272,15 @@ std::optional StorageMergeTree::totalBytes(const Settings &) const return getTotalActiveSizeInBytes(); } +std::optional StorageMergeTree::totalBytesUncompressed(const Settings &) const +{ + UInt64 res = 0; + auto parts = getDataPartsForInternalUsage(); + for (const auto & part : parts) + res += part->getBytesUncompressedOnDisk(); + return res; +} + SinkToStoragePtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) { @@ -280,12 +289,20 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context); } -void StorageMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const +void StorageMergeTree::checkTableCanBeDropped(ContextPtr query_context) const { if (!supportsReplication() && isStaticStorage()) return; auto table_id = getStorageID(); + + const auto & query_settings = query_context->getSettingsRef(); + if (query_settings.max_table_size_to_drop.changed) + { + getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes(), query_settings.max_table_size_to_drop); + return; + } + getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 863a4b91487..89de60ed819 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -70,6 +70,7 @@ public: std::optional totalRows(const Settings &) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const override; std::optional totalBytes(const Settings &) const override; + std::optional totalBytesUncompressed(const Settings &) const override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 7961c44e844..a97104a5a68 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -456,7 +456,7 @@ SinkToStoragePtr StoragePostgreSQL::write( return std::make_shared(metadata_snapshot, pool->get(), remote_table_name, remote_table_schema, on_conflict); } -StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult(const NamedCollection & named_collection, bool require_table) +StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult(const NamedCollection & named_collection, ContextPtr context_, bool require_table) { StoragePostgreSQL::Configuration configuration; ValidateKeysMultiset required_arguments = {"user", "username", "password", "database", "db"}; @@ -473,6 +473,12 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult configuration.port = static_cast(named_collection.get("port")); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; } + else + { + size_t max_addresses = context_->getSettingsRef().glob_expansion_max_elements; + configuration.addresses = parseRemoteDescriptionForExternalDatabase( + configuration.addresses_expr, max_addresses, 5432); + } configuration.username = named_collection.getAny({"username", "user"}); configuration.password = named_collection.get("password"); @@ -490,7 +496,7 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::getConfiguration(ASTs engine StoragePostgreSQL::Configuration configuration; if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context)) { - configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection); + configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context); } else { diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index fb8b5a22df2..725a935aa46 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -65,7 +65,7 @@ public: static Configuration getConfiguration(ASTs engine_args, ContextPtr context); - static Configuration processNamedCollectionResult(const NamedCollection & named_collection, bool require_table = true); + static Configuration processNamedCollectionResult(const NamedCollection & named_collection, ContextPtr context_, bool require_table = true); static ColumnsDescription getTableStructureFromData( const postgres::PoolWithFailoverPtr & pool_, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3e4d6309ec8..0089eeada00 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -265,7 +265,7 @@ String StorageReplicatedMergeTree::getEndpointName() const static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) { - return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}); + return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout); } static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id) @@ -2707,7 +2707,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) { String source_replica_path = fs::path(zookeeper_path) / "replicas" / part_desc->replica; ReplicatedMergeTreeAddress address(getZooKeeper()->get(fs::path(source_replica_path) / "host")); - auto timeouts = getFetchPartHTTPTimeouts(getContext()); + auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getContext()->getServerSettings(), getContext()->getSettingsRef()); auto credentials = getContext()->getInterserverCredentials(); String interserver_scheme = getContext()->getInterserverScheme(); @@ -4174,23 +4174,6 @@ void StorageReplicatedMergeTree::stopBeingLeader() is_leader = false; } -ConnectionTimeouts StorageReplicatedMergeTree::getFetchPartHTTPTimeouts(ContextPtr local_context) -{ - auto timeouts = getHTTPTimeouts(local_context); - auto settings = getSettings(); - - if (settings->replicated_fetches_http_connection_timeout.changed) - timeouts.connection_timeout = settings->replicated_fetches_http_connection_timeout; - - if (settings->replicated_fetches_http_send_timeout.changed) - timeouts.send_timeout = settings->replicated_fetches_http_send_timeout; - - if (settings->replicated_fetches_http_receive_timeout.changed) - timeouts.receive_timeout = settings->replicated_fetches_http_receive_timeout; - - return timeouts; -} - bool StorageReplicatedMergeTree::checkReplicaHavePart(const String & replica, const String & part_name) { auto zookeeper = getZooKeeper(); @@ -4795,7 +4778,7 @@ bool StorageReplicatedMergeTree::fetchPart( else { address.fromString(zookeeper->get(fs::path(source_replica_path) / "host")); - timeouts = getFetchPartHTTPTimeouts(getContext()); + timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getContext()->getServerSettings(), getContext()->getSettingsRef()); credentials = getContext()->getInterserverCredentials(); interserver_scheme = getContext()->getInterserverScheme(); @@ -5476,6 +5459,12 @@ std::optional StorageReplicatedMergeTree::totalBytes(const Settings & se return res; } +std::optional StorageReplicatedMergeTree::totalBytesUncompressed(const Settings & settings) const +{ + UInt64 res = 0; + foreachActiveParts([&res](auto & part) { res += part->getBytesUncompressedOnDisk(); }, settings.select_sequential_consistency); + return res; +} void StorageReplicatedMergeTree::assertNotReadonly() const { @@ -6440,9 +6429,17 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( } -void StorageReplicatedMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const +void StorageReplicatedMergeTree::checkTableCanBeDropped(ContextPtr query_context) const { auto table_id = getStorageID(); + + const auto & query_settings = query_context->getSettingsRef(); + if (query_settings.max_table_size_to_drop.changed) + { + getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes(), query_settings.max_table_size_to_drop); + return; + } + getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); } @@ -10261,7 +10258,7 @@ void StorageReplicatedMergeTree::backupData( bool exists = false; Strings mutation_ids; { - ZooKeeperRetriesControl retries_ctl("getMutations", zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutations", log, zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) @@ -10280,7 +10277,7 @@ void StorageReplicatedMergeTree::backupData( bool mutation_id_exists = false; String mutation; - ZooKeeperRetriesControl retries_ctl("getMutation", zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutation", log, zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index f68a7561b93..94ddaa753a5 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -165,6 +165,7 @@ public: std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context) const override; std::optional totalBytes(const Settings & settings) const override; + std::optional totalBytesUncompressed(const Settings & settings) const override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; @@ -757,10 +758,6 @@ private: int32_t alter_version, int32_t log_version); - /// Exchange parts. - - ConnectionTimeouts getFetchPartHTTPTimeouts(ContextPtr context); - /** Returns an empty string if no one has a part. */ String findReplicaHavingPart(const String & part_name, bool active); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 556b4f5655b..096e2e88f91 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -104,6 +104,7 @@ static const std::unordered_set optional_configuration_keys = "structure", "access_key_id", "secret_access_key", + "session_token", "filename", "use_environment_credentials", "max_single_read_retries", @@ -1460,7 +1461,7 @@ void StorageS3::Configuration::connect(ContextPtr context) client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); + auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); client = S3::ClientFactory::instance().create( client_configuration, url.is_virtual_hosted_style, @@ -1521,11 +1522,14 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// S3('url', NOSIGN, 'format') /// S3('url', NOSIGN, 'format', 'compression') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') /// with optional headers() function - if (engine_args.empty() || engine_args.size() > 5) + if (engine_args.empty() || engine_args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage S3 requires 1 to 5 arguments: " "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); @@ -1541,7 +1545,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context static std::unordered_map> size_to_engine_args { {1, {{}}}, - {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}} + {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}} }; std::unordered_map engine_args_to_idx; @@ -1577,7 +1581,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; } - /// For 4 arguments we support 2 possible variants: + /// For 4 arguments we support 3 possible variants: + /// - s3(source, access_key_id, secret_access_key, session_token) /// - s3(source, access_key_id, secret_access_key, format) /// - s3(source, NOSIGN, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. @@ -1590,7 +1595,32 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; } else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } + } + /// For 5 arguments we support 2 possible variants: + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, compression) + else if (engine_args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } } else { @@ -1612,6 +1642,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context if (engine_args_to_idx.contains("secret_access_key")) configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + if (engine_args_to_idx.contains("session_token")) + configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); + + configuration.auth_settings.no_sign_request = no_sign_request; } diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index a22ba6586ac..34c092c7208 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -35,6 +35,7 @@ void StorageSnapshot::init() if (storage.hasLightweightDeletedMask()) system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type; + system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index fd94a3cab06..d38d3486410 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -93,7 +93,7 @@ static bool urlWithGlobs(const String & uri) static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) { - return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}); + return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout); } IStorageURLBase::IStorageURLBase( diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 0ba8838d4c3..a569c50835c 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -142,7 +142,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet local_context, ConnectionTimeouts::getHTTPTimeouts( local_context->getSettingsRef(), - {local_context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}), + local_context->getServerSettings().keep_alive_timeout), compression_method); } diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 46ab70ff04a..a5dd7ea6e0b 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -20,6 +20,7 @@ NamesAndTypesList StorageSystemBackups::getNamesAndTypes() NamesAndTypesList names_and_types{ {"id", std::make_shared()}, {"name", std::make_shared()}, + {"base_backup_name", std::make_shared()}, {"status", std::make_shared(getBackupStatusEnumValues())}, {"error", std::make_shared()}, {"start_time", std::make_shared()}, @@ -42,6 +43,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con size_t column_index = 0; auto & column_id = assert_cast(*res_columns[column_index++]); auto & column_name = assert_cast(*res_columns[column_index++]); + auto & column_base_backup_name = assert_cast(*res_columns[column_index++]); auto & column_status = assert_cast(*res_columns[column_index++]); auto & column_error = assert_cast(*res_columns[column_index++]); auto & column_start_time = assert_cast(*res_columns[column_index++]); @@ -59,6 +61,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con { column_id.insertData(info.id.data(), info.id.size()); column_name.insertData(info.name.data(), info.name.size()); + column_base_backup_name.insertData(info.base_backup_name.data(), info.base_backup_name.size()); column_status.insertValue(static_cast(info.status)); column_error.insertData(info.error_message.data(), info.error_message.size()); column_start_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 1baaf5144e8..e02d4bf1733 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -1,4 +1,4 @@ -#ifdef OS_LINUX /// Because of 'sigqueue' functions and RT signals. +#ifdef OS_LINUX /// Because of 'rt_tgsigqueueinfo' functions and RT signals. #include #include @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -24,11 +25,16 @@ #include #include #include +#include #include +#include #include +#include +#include #include +#include #include - +#include namespace DB { @@ -48,8 +54,8 @@ namespace { // Initialized in StorageSystemStackTrace's ctor and used in signalHandler. -std::atomic expected_pid; -const int sig = SIGRTMIN; +std::atomic server_pid; +const int STACK_TRACE_SERVICE_SIGNAL = SIGRTMIN; std::atomic sequence_num = 0; /// For messages sent via pipe. std::atomic data_ready_num = 0; @@ -73,6 +79,11 @@ size_t query_id_size = 0; LazyPipeFDs notification_pipe; +int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t *info) +{ + return static_cast(syscall(__NR_rt_tgsigqueueinfo, tgid, tid, sig, info)); +} + void signalHandler(int, siginfo_t * info, void * context) { DENY_ALLOCATIONS_IN_SCOPE; @@ -80,7 +91,7 @@ void signalHandler(int, siginfo_t * info, void * context) /// In case malicious user is sending signals manually (for unknown reason). /// If we don't check - it may break our synchronization. - if (info->si_pid != expected_pid) + if (info->si_pid != server_pid) return; /// Signal received too late. @@ -162,7 +173,7 @@ bool wait(int timeout_ms) } using ThreadIdToName = std::unordered_map>; -ThreadIdToName getFilteredThreadNames(ASTPtr query, ContextPtr context, const PaddedPODArray & thread_ids, Poco::Logger * log) +ThreadIdToName getFilteredThreadNames(const ActionsDAG::Node * predicate, ContextPtr context, const PaddedPODArray & thread_ids, Poco::Logger * log) { ThreadIdToName tid_to_name; MutableColumnPtr all_thread_names = ColumnString::create(); @@ -193,7 +204,7 @@ ThreadIdToName getFilteredThreadNames(ASTPtr query, ContextPtr context, const Pa LOG_TRACE(log, "Read {} thread names for {} threads, took {} ms", tid_to_name.size(), thread_ids.size(), watch.elapsedMilliseconds()); Block block { ColumnWithTypeAndName(std::move(all_thread_names), std::make_shared(), "thread_name") }; - VirtualColumnUtils::filterBlockWithQuery(query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); ColumnPtr thread_names = std::move(block.getByPosition(0).column); std::unordered_set filtered_thread_names; @@ -214,24 +225,69 @@ ThreadIdToName getFilteredThreadNames(ASTPtr query, ContextPtr context, const Pa return tid_to_name; } +bool parseHexNumber(std::string_view sv, UInt64 & res) +{ + errno = 0; /// Functions strto* don't clear errno. + char * pos_integer = const_cast(sv.begin()); + res = std::strtoull(sv.begin(), &pos_integer, 16); + return (pos_integer == sv.begin() + sv.size() && errno != ERANGE); +} +bool isSignalBlocked(UInt64 tid, int signal) +{ + String buffer; + + try + { + ReadBufferFromFile status(fmt::format("/proc/{}/status", tid)); + while (!status.eof()) + { + readEscapedStringUntilEOL(buffer, status); + if (!status.eof()) + ++status.position(); + if (buffer.starts_with("SigBlk:")) + break; + } + status.close(); + + std::string_view line(buffer); + line = line.substr(strlen("SigBlk:")); + line = line.substr(0, line.rend() - std::find_if_not(line.rbegin(), line.rend(), ::isspace)); + + UInt64 sig_blk; + if (parseHexNumber(line, sig_blk)) + return sig_blk & signal; + } + catch (const Exception & e) + { + /// Ignore TOCTOU error + if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) + throw; + } + + return false; +} + /// Send a signal to every thread and wait for result. /// We must wait for every thread one by one sequentially, /// because there is a limit on number of queued signals in OS and otherwise signals may get lost. /// Also, non-RT signals are not delivered if previous signal is handled right now (by default; but we use RT signals). -class StorageSystemStackTraceSource : public ISource +class StackTraceSource : public ISource { public: - StorageSystemStackTraceSource(const Names & column_names, Block header_, const ASTPtr query_, ContextPtr context_, UInt64 max_block_size_, Poco::Logger * log_) + StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, Poco::Logger * log_) : ISource(header_) , context(context_) , header(std::move(header_)) - , query(query_) + , query(std::move(query_)) + , filter_dag(std::move(filter_dag_)) + , predicate(filter_dag ? filter_dag->getOutputs().at(0) : nullptr) , max_block_size(max_block_size_) , pipe_read_timeout_ms(static_cast(context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds())) , log(log_) , proc_it("/proc/self/task") /// It shouldn't be possible to do concurrent reads from this table. , lock(mutex) + , signal_str(strsignal(STACK_TRACE_SERVICE_SIGNAL)) /// NOLINT(concurrency-mt-unsafe) // not thread-safe but ok in this context { /// Create a mask of what columns are needed in the result. NameSet names_set(column_names.begin(), column_names.end()); @@ -257,9 +313,10 @@ protected: const auto & thread_ids_data = assert_cast(*thread_ids).getData(); + /// NOTE: This is racy, so you may get incorrect thread_name. ThreadIdToName thread_names; if (read_thread_names) - thread_names = getFilteredThreadNames(query, context, thread_ids_data, log); + thread_names = getFilteredThreadNames(predicate, context, thread_ids_data, log); for (UInt64 tid : thread_ids_data) { @@ -283,53 +340,71 @@ protected: } else { - ++signals_sent; - Stopwatch watch; - SCOPE_EXIT({ signals_sent_ms += watch.elapsedMilliseconds(); }); - - sigval sig_value{}; - - sig_value.sival_int = sequence_num.load(std::memory_order_acquire); - if (0 != ::sigqueue(static_cast(tid), sig, sig_value)) + /// NOTE: This check is racy (thread can be + /// destroyed/replaced/...), but it is OK, since only the + /// following could happen: + /// - it will incorrectly detect that the signal is blocked and + /// will not send it this time + /// - it will incorrectly detect that the signal is not blocked + /// then it will wait storage_system_stack_trace_pipe_read_timeout_ms + bool signal_blocked = isSignalBlocked(tid, STACK_TRACE_SERVICE_SIGNAL); + if (!signal_blocked) { - /// The thread may has been already finished. - if (ESRCH == errno) + ++signals_sent; + Stopwatch watch; + SCOPE_EXIT({ + signals_sent_ms += watch.elapsedMilliseconds(); + + /// Signed integer overflow is undefined behavior in both C and C++. However, according to + /// C++ standard, Atomic signed integer arithmetic is defined to use two's complement; there + /// are no undefined results. See https://en.cppreference.com/w/cpp/atomic/atomic and + /// http://eel.is/c++draft/atomics.types.generic#atomics.types.int-8 + ++sequence_num; + }); + + siginfo_t sig_info{}; + sig_info.si_code = SI_QUEUE; /// sigqueue() + sig_info.si_pid = server_pid; + sig_info.si_value.sival_int = sequence_num.load(std::memory_order_acquire); + + if (0 != rt_tgsigqueueinfo(server_pid, static_cast(tid), STACK_TRACE_SERVICE_SIGNAL, &sig_info)) + { + /// The thread may has been already finished. + if (ESRCH == errno) + continue; + + throw ErrnoException(ErrorCodes::CANNOT_SIGQUEUE, "Cannot queue a signal"); + } + + /// Just in case we will wait for pipe with timeout. In case signal didn't get processed. + if (wait(pipe_read_timeout_ms) && sig_info.si_value.sival_int == data_ready_num.load(std::memory_order_acquire)) + { + size_t stack_trace_size = stack_trace.getSize(); + size_t stack_trace_offset = stack_trace.getOffset(); + + Array arr; + arr.reserve(stack_trace_size - stack_trace_offset); + for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) + arr.emplace_back(reinterpret_cast(stack_trace.getFramePointers()[i])); + + res_columns[res_index++]->insert(thread_name); + res_columns[res_index++]->insert(tid); + res_columns[res_index++]->insertData(query_id_data, query_id_size); + res_columns[res_index++]->insert(arr); + continue; - - throw ErrnoException(ErrorCodes::CANNOT_SIGQUEUE, "Cannot send signal with sigqueue"); + } } - /// Just in case we will wait for pipe with timeout. In case signal didn't get processed. - if (wait(pipe_read_timeout_ms) && sig_value.sival_int == data_ready_num.load(std::memory_order_acquire)) - { - size_t stack_trace_size = stack_trace.getSize(); - size_t stack_trace_offset = stack_trace.getOffset(); - - Array arr; - arr.reserve(stack_trace_size - stack_trace_offset); - for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) - arr.emplace_back(reinterpret_cast(stack_trace.getFramePointers()[i])); - - res_columns[res_index++]->insert(thread_name); - res_columns[res_index++]->insert(tid); - res_columns[res_index++]->insertData(query_id_data, query_id_size); - res_columns[res_index++]->insert(arr); - } + if (signal_blocked) + LOG_DEBUG(log, "Thread {} ({}) blocks SIG{} signal", tid, thread_name, signal_str); else - { - LOG_DEBUG(log, "Cannot obtain a stack trace for thread {}", tid); + LOG_DEBUG(log, "Cannot obtain a stack trace for thread {} ({})", tid, thread_name); - res_columns[res_index++]->insert(thread_name); - res_columns[res_index++]->insert(tid); - res_columns[res_index++]->insertDefault(); - res_columns[res_index++]->insertDefault(); - } - - /// Signed integer overflow is undefined behavior in both C and C++. However, according to - /// C++ standard, Atomic signed integer arithmetic is defined to use two's complement; there - /// are no undefined results. See https://en.cppreference.com/w/cpp/atomic/atomic and - /// http://eel.is/c++draft/atomics.types.generic#atomics.types.int-8 - ++sequence_num; + res_columns[res_index++]->insert(thread_name); + res_columns[res_index++]->insert(tid); + res_columns[res_index++]->insertDefault(); + res_columns[res_index++]->insertDefault(); } } LOG_TRACE(log, "Send signal to {} threads (total), took {} ms", signals_sent, signals_sent_ms); @@ -343,6 +418,8 @@ private: ContextPtr context; Block header; const ASTPtr query; + const ActionsDAGPtr filter_dag; + const ActionsDAG::Node * predicate; const size_t max_block_size; const int pipe_read_timeout_ms; @@ -358,6 +435,7 @@ private: size_t signals_sent_ms = 0; std::unique_lock lock; + const char * signal_str; ColumnPtr getFilteredThreadIds() { @@ -372,11 +450,55 @@ private: } Block block { ColumnWithTypeAndName(std::move(all_thread_ids), std::make_shared(), "thread_id") }; - VirtualColumnUtils::filterBlockWithQuery(query, block, context); + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; } }; +class ReadFromSystemStackTrace : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemStackTrace"; } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + Pipe pipe(std::make_shared( + column_names, + getOutputStream().header, + std::move(query), + std::move(filter_actions_dag), + context, + max_block_size, + log)); + pipeline.init(std::move(pipe)); + } + + ReadFromSystemStackTrace( + const Names & column_names_, + Block sample_block, + ASTPtr && query_, + ContextPtr context_, + size_t max_block_size_, + Poco::Logger * log_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + , column_names(column_names_) + , query(query_) + , context(std::move(context_)) + , max_block_size(max_block_size_) + , log(log_) + { + } + +private: + Names column_names; + ASTPtr query; + ContextPtr context; + size_t max_block_size; + Poco::Logger * log; +}; + } @@ -396,7 +518,7 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) notification_pipe.open(); /// Setup signal handler. - expected_pid = getpid(); + server_pid = getpid(); struct sigaction sa{}; sa.sa_sigaction = signalHandler; sa.sa_flags = SA_SIGINFO; @@ -404,31 +526,35 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) if (sigemptyset(&sa.sa_mask)) throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Cannot set signal handler"); - if (sigaddset(&sa.sa_mask, sig)) + if (sigaddset(&sa.sa_mask, STACK_TRACE_SERVICE_SIGNAL)) throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Cannot set signal handler"); - if (sigaction(sig, &sa, nullptr)) + if (sigaction(STACK_TRACE_SERVICE_SIGNAL, &sa, nullptr)) throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); } -Pipe StorageSystemStackTrace::read( +void StorageSystemStackTrace::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t max_block_size, - const size_t /*num_streams*/) + size_t max_block_size, + size_t /*num_streams*/) { storage_snapshot->check(column_names); - return Pipe(std::make_shared( + Block sample_block = storage_snapshot->metadata->getSampleBlock(); + + auto reading = std::make_unique( column_names, - storage_snapshot->metadata->getSampleBlock(), + sample_block, query_info.query->clone(), context, max_block_size, - log)); + log); + query_plan.addStep(std::move(reading)); } } diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index 9f15499ce90..18216cea1bd 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -25,14 +25,15 @@ public: String getName() const override { return "SystemStackTrace"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, - QueryProcessingStage::Enum processed_stage, + QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - size_t num_streams) override; + size_t /*num_streams*/) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 7378ef85073..53b28543bf1 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -54,6 +54,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) {"storage_policy", std::make_shared()}, {"total_rows", std::make_shared(std::make_shared())}, {"total_bytes", std::make_shared(std::make_shared())}, + {"total_bytes_uncompressed", std::make_shared(std::make_shared())}, {"parts", std::make_shared(std::make_shared())}, {"active_parts", std::make_shared(std::make_shared())}, {"total_marks", std::make_shared(std::make_shared())}, @@ -518,6 +519,15 @@ protected: res_columns[res_index++]->insertDefault(); } + if (columns_mask[src_index++]) + { + auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); + if (total_bytes_uncompressed) + res_columns[res_index++]->insert(*total_bytes_uncompressed); + else + res_columns[res_index++]->insertDefault(); + } + auto table_merge_tree = std::dynamic_pointer_cast(table); if (columns_mask[src_index++]) { diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index b1746ea769f..ca6d40a05a3 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -159,7 +159,7 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex {}, ConnectionTimeouts::getHTTPTimeouts( context->getSettingsRef(), - {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}), + context->getServerSettings().keep_alive_timeout), credentials); std::string columns_info; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index e6ae75a5fd5..c52256fb984 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -71,7 +71,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (header_it != args.end()) args.erase(header_it); - if (args.empty() || args.size() > 6) + if (args.empty() || args.size() > 7) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); for (auto & arg : args) @@ -81,7 +81,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context static std::unordered_map> size_to_args { {1, {{}}}, - {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}} + {7, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}} }; std::unordered_map args_to_idx; @@ -118,11 +118,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; } - /// For 4 arguments we support 3 possible variants: + /// For 4 arguments we support 4 possible variants: /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, access_key_id, format) + /// - s3(source, access_key_id, access_key_id, format), + /// - s3(source, access_key_id, access_key_id, session_token) /// - s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. + /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. else if (args.size() == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); @@ -132,14 +133,28 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"format", 2}, {"structure", 3}}; } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; + } else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } } - /// For 5 arguments we support 2 possible variants: + /// For 5 arguments we support 3 possible variants: /// - s3(source, access_key_id, access_key_id, format, structure) + /// - s3(source, access_key_id, access_key_id, session_token, format) /// - s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, + /// and by the 4-th argument, check if it's a format name or not else if (args.size() == 5) { auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); @@ -149,7 +164,33 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; } else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + // For 6 arguments we support 2 possible variants: + /// - s3(source, access_key_id, access_key_id, format, structure, compression_method) + /// - s3(source, access_key_id, access_key_id, session_token, format, structure) + /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; + } } else { @@ -181,6 +222,9 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (args_to_idx.contains("secret_access_key")) configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); + if (args_to_idx.contains("session_token")) + configuration.auth_settings.session_token = checkAndGetLiteralArgument(args[args_to_idx["session_token"]], "session_token"); + configuration.auth_settings.no_sign_request = no_sign_request; if (configuration.format == "auto") diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index fc384176007..fa73c1d313e 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -22,11 +22,15 @@ public: static constexpr auto signature = " - url\n" " - url, format\n" " - url, format, structure\n" - " - url, access_key_id, secret_access_key\n" " - url, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key\n" + " - url, access_key_id, secret_access_key, session_token\n" " - url, access_key_id, secret_access_key, format\n" + " - url, access_key_id, secret_access_key, session_token, format\n" " - url, access_key_id, secret_access_key, format, structure\n" + " - url, access_key_id, secret_access_key, session_token, format, structure\n" " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; static size_t getMaxNumberOfArguments() { return 6; } diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 4fe25079cf4..718b0d90de8 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -35,6 +35,7 @@ public: " - cluster, url, access_key_id, secret_access_key, format\n" " - cluster, url, access_key_id, secret_access_key, format, structure\n" " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; String getName() const override diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c3c6d9be6da..7de2d5a9fdd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -19,9 +19,6 @@ endif() if (TARGET ch_contrib::rdkafka) set(USE_RDKAFKA 1) endif() -if (TARGET ch_rust::blake3) - set(USE_BLAKE3 1) -endif() if (TARGET ch_rust::skim) set(USE_SKIM 1) endif() @@ -103,6 +100,7 @@ endif() if (TARGET ch_contrib::llvm) set(USE_EMBEDDED_COMPILER ${ENABLE_EMBEDDED_COMPILER}) set(USE_DWARF_PARSER ${ENABLE_DWARF_PARSER}) + set(USE_BLAKE3 ${ENABLE_LIBRARIES}) endif() if (TARGET ch_contrib::unixodbc) set(USE_ODBC 1) @@ -131,6 +129,9 @@ endif() if (TARGET ch_contrib::sqids) set(USE_SQIDS 1) endif() +if (TARGET ch_contrib::idna) + set(USE_IDNA 1) +endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index b0e611fa77b..735094df78b 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -2,7 +2,6 @@ 00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation -01064_incremental_streaming_from_2_src_with_feedback 01083_expressions_in_engine_arguments 01155_rename_move_materialized_view 01214_test_storage_merge_aliases_with_where @@ -20,9 +19,7 @@ 01761_cast_to_enum_nullable 01925_join_materialized_columns 01925_test_storage_merge_aliases -01947_mv_subquery 01952_optimize_distributed_group_by_sharding_key -02139_MV_with_scalar_subquery 02174_cte_scalar_cache_mv 02352_grouby_shadows_arg 02354_annoy @@ -36,7 +33,6 @@ 02404_memory_bound_merging 02725_agg_projection_resprect_PK 02763_row_policy_storage_merge_alias -02765_parallel_replicas_final_modifier 02784_parallel_replicas_automatic_decision_join 02818_parameterized_view_with_cte_multiple_usage 02815_range_dict_no_direct_join diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f5181f4c843..27243aac4f1 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,7 +9,7 @@ import sys import time from ci_config import CI_CONFIG, BuildConfig -from ccache_utils import CargoCache +from cache_utils import CargoCache from env_helper import ( GITHUB_JOB_API_URL, diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 755217f89b5..d9925725eec 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -78,7 +78,7 @@ def main(): pr_info = PRInfo() commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, build_check_name) + atexit.register(update_mergeable_check, commit, pr_info, build_check_name) rerun_helper = RerunHelper(commit, build_check_name) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/ci/ccache_utils.py b/tests/ci/cache_utils.py similarity index 63% rename from tests/ci/ccache_utils.py rename to tests/ci/cache_utils.py index 6ccaa8c80e0..0906b1d14e5 100644 --- a/tests/ci/ccache_utils.py +++ b/tests/ci/cache_utils.py @@ -5,12 +5,10 @@ import os import shutil from pathlib import Path -import requests # type: ignore - -from build_download_helper import download_build_with_progress, DownloadException -from compress_files import decompress_fast, compress_fast +from build_download_helper import DownloadException, download_build_with_progress +from compress_files import compress_fast, decompress_fast from digest_helper import digest_path -from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET +from env_helper import S3_BUILDS_BUCKET, S3_DOWNLOAD from git_helper import git_runner from s3_helper import S3Helper @@ -98,7 +96,67 @@ def upload_ccache( logging.info("Upload finished") -class CargoCache: +class CacheError(Exception): + pass + + +class Cache: + """a generic class for all caches""" + + def __init__( + self, + directory: Path, + temp_path: Path, + archive_name: str, + s3_helper: S3Helper, + ): + self.directory = directory + self.temp_path = temp_path + self.archive_name = archive_name + self.s3_helper = s3_helper + + def _download(self, url: str, ignore_error: bool = False) -> None: + compressed_cache = self.temp_path / self.archive_name + try: + download_build_with_progress(url, compressed_cache) + except DownloadException as e: + if not ignore_error: + raise CacheError(f"Failed to download {url}") from e + logging.warning("Unable downloading cache, creating empty directory") + self.directory.mkdir(parents=True, exist_ok=True) + return + + # decompress the cache and check if the necessary directory is there + self.directory.parent.mkdir(parents=True, exist_ok=True) + decompress_fast(compressed_cache, self.directory.parent) + if not self.directory.exists(): + if not ignore_error: + raise CacheError( + "The cache is downloaded and uncompressed, but directory " + f"{self.directory} does not exist" + ) + logging.warning( + "The cache archive was successfully downloaded and " + "decompressed, but %s does not exitst. Creating empty one", + self.directory, + ) + self.directory.mkdir(parents=True, exist_ok=True) + + def _upload(self, s3_path: str, force_upload: bool = False) -> None: + if not force_upload: + existing_cache = self.s3_helper.list_prefix_non_recursive(s3_path) + if existing_cache: + logging.info("Remote cache %s already exist, won't reupload", s3_path) + return + + logging.info("Compressing cargo cache") + archive_path = self.temp_path / self.archive_name + compress_fast(self.directory, archive_path) + logging.info("Uploading %s to S3 path %s", archive_path, s3_path) + self.s3_helper.upload_build_file_to_s3(archive_path, s3_path) + + +class CargoCache(Cache): PREFIX = "ccache/cargo_cache" def __init__( @@ -107,51 +165,49 @@ class CargoCache: temp_path: Path, s3_helper: S3Helper, ): - self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock" - self.lock_hash = digest_path(self._cargo_lock_file).hexdigest() - self.directory = directory - self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst" - self.temp_path = temp_path - self.s3_helper = s3_helper - self._url = ( - f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{self.PREFIX}/{self.archive_name}" - ) + cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock" + self.lock_hash = digest_path(cargo_lock_file).hexdigest() self._force_upload_cache = False + super().__init__( + directory, temp_path, f"Cargo_cache_{self.lock_hash}.tar.zst", s3_helper + ) + self._url = self.s3_helper.get_url( + S3_BUILDS_BUCKET, f"{self.PREFIX}/{self.archive_name}" + ) def download(self): logging.info("Searching rust cache for Cargo.lock md5 %s", self.lock_hash) - compressed_cache = self.temp_path / self.archive_name try: - download_build_with_progress(self._url, compressed_cache) - except DownloadException: + self._download(self._url, False) + except CacheError: logging.warning("Unable downloading cargo cache, creating empty directory") + logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) + self._force_upload_cache = True self.directory.mkdir(parents=True, exist_ok=True) return - # decompress the cache and check if the necessary directory is there - self.directory.parent.mkdir(parents=True, exist_ok=True) - decompress_fast(compressed_cache, self.directory.parent) - if not self.directory.exists(): - logging.warning( - "The cargo cache archive was successfully downloaded and " - "decompressed, but %s does not exitst. Creating empty one", - self.directory, - ) - logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) - self.directory.mkdir(parents=True, exist_ok=True) + def upload(self): + self._upload(f"{self.PREFIX}/{self.archive_name}", self._force_upload_cache) + + +class GitHubCache(Cache): + PREFIX = "ccache/github" + + def __init__( + self, + directory: Path, + temp_path: Path, + s3_helper: S3Helper, + ): + self.force_upload = True + super().__init__(directory, temp_path, "GitHub.tar.zst", s3_helper) + self._url = self.s3_helper.get_url( + S3_BUILDS_BUCKET, f"{self.PREFIX}/{self.archive_name}" + ) + + def download(self): + logging.info("Searching cache for GitHub class") + self._download(self._url, True) def upload(self): - if not self._force_upload_cache: - cache_response = requests.head(self._url) - if cache_response.status_code == 200: - logging.info( - "Remote cargo cache %s already exist, won't reupload", self._url - ) - return - - logging.info("Compressing cargo cache") - archive_path = self.directory.parent / self.archive_name - compress_fast(self.directory, archive_path) - s3_path = f"{self.PREFIX}/{self.archive_name}" - logging.info("Uploading %s to S3 path %s", archive_path, s3_path) - self.s3_helper.upload_build_file_to_s3(archive_path, s3_path) + self._upload(f"{self.PREFIX}/{self.archive_name}", True) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index bed12d54fe3..701f66b9a6a 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -135,7 +135,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: "--skip-jobs", action="store_true", default=False, - help="skip fetching data about job runs, used in --configure action (for debugging)", + help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)", ) parser.add_argument( "--rebuild-all-docker", @@ -279,11 +279,11 @@ def _configure_docker_jobs( images_info = docker_images_helper.get_images_info() # a. check missing images - print("Start checking missing images in dockerhub") - # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] - # find if it's possible to use the setting of /etc/docker/daemon.json - docker_images_helper.docker_login() if not rebuild_all_dockers: + # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] + # find if it's possible to use the setting of /etc/docker/daemon.json + docker_images_helper.docker_login() + print("Start checking missing images in dockerhub") missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict) missing_multi = list(missing_multi_dict) missing_amd64 = [] @@ -305,6 +305,15 @@ def _configure_docker_jobs( "aarch64", ) ) + # FIXME: temporary hack, remove after transition to docker digest as tag + else: + if missing_multi: + print( + f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag" + ) + for image in missing_multi: + imagename_digest_dict[image] = "latest" + print("...checking missing images in dockerhub - done") else: # add all images to missing missing_multi = list(imagename_digest_dict) @@ -315,16 +324,7 @@ def _configure_docker_jobs( for name in imagename_digest_dict if not images_info[name]["only_amd64"] ] - # FIXME: temporary hack, remove after transition to docker digest as tag - if docker_digest_or_latest: - if missing_multi: - print( - f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag" - ) - for image in missing_multi: - imagename_digest_dict[image] = "latest" - print("...checking missing images in dockerhub - done") return { "images": imagename_digest_dict, "missing_aarch64": missing_aarch64, @@ -376,6 +376,9 @@ def _configure_jobs( if job_config.run_by_label in pr_labels: for batch in range(num_batches): # type: ignore batches_to_do.append(batch) + elif job_config.run_always: + # always add to todo + batches_to_do.append(batch) else: # this job controlled by digest, add to todo if it's not successfully done before for batch in range(num_batches): # type: ignore @@ -394,16 +397,31 @@ def _configure_jobs( else: jobs_to_skip += (job,) + if pr_labels: + jobs_requested_by_label = [] # type: List[str] + ci_controlling_labels = [] # type: List[str] + for label in pr_labels: + label_config = CI_CONFIG.get_label_config(label) + if label_config: + jobs_requested_by_label += label_config.run_jobs + ci_controlling_labels += [label] + if ci_controlling_labels: + print(f"NOTE: CI controlling labels are set: [{ci_controlling_labels}]") + print( + f" : following jobs will be executed: [{jobs_requested_by_label}]" + ) + jobs_to_do = jobs_requested_by_label + if commit_tokens: requested_jobs = [ token[len("#job_") :] for token in commit_tokens if token.startswith("#job_") ] - assert any( - len(x) > 1 for x in requested_jobs - ), f"Invalid job names requested [{requested_jobs}]" if requested_jobs: + assert any( + len(x) > 1 for x in requested_jobs + ), f"Invalid job names requested [{requested_jobs}]" jobs_to_do_requested = [] for job in requested_jobs: job_with_parents = CI_CONFIG.get_job_with_parents(job) @@ -413,7 +431,7 @@ def _configure_jobs( if parent in jobs_to_do and parent not in jobs_to_do_requested: jobs_to_do_requested.append(parent) print( - f"NOTE: Only specific job(s) were requested: [{jobs_to_do_requested}]" + f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]" ) jobs_to_do = jobs_to_do_requested @@ -511,7 +529,14 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" matches = re.findall(pattern, message) - return matches + res = [ + match + for match in matches + if match == "#no-merge-commit" + or match.startswith("#job_") + or match.startswith("#job-") + ] + return res def main() -> int: @@ -539,14 +564,14 @@ def main() -> int: if args.configure: GR = GitRunner() - pr_info = PRInfo(need_changed_files=True) + pr_info = PRInfo() docker_data = {} git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD") # if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit tokens = [] - if pr_info.number != 0: + if pr_info.number != 0 and not args.skip_jobs: message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1") tokens = _fetch_commit_tokens(message) print(f"Found commit message tokens: [{tokens}]") @@ -598,8 +623,10 @@ def main() -> int: result["jobs_data"] = jobs_data result["docker_data"] = docker_data if pr_info.number != 0 and not args.docker_digest_or_latest: + # FIXME: it runs style check before docker build if possible (style-check images is not changed) + # find a way to do style check always before docker build and others _check_and_update_for_early_style_check(result) - if pr_info.number != 0 and pr_info.has_changes_in_documentation_only(): + if pr_info.has_changes_in_documentation_only(): _update_config_for_docs_only(result) elif args.update_gh_statuses: @@ -680,7 +707,8 @@ def main() -> int: elif args.mark_success: assert indata, "Run config must be provided via --infile" job = args.job_name - num_batches = CI_CONFIG.get_job_config(job).num_batches + job_config = CI_CONFIG.get_job_config(job) + num_batches = job_config.num_batches assert ( num_batches <= 1 or 0 <= args.batch < num_batches ), f"--batch must be provided and in range [0, {num_batches}) for {job}" @@ -697,7 +725,7 @@ def main() -> int: if not CommitStatusData.is_present(): # apparently exit after rerun-helper check # do nothing, exit without failure - print("ERROR: no status file for job [{job}]") + print(f"ERROR: no status file for job [{job}]") job_status = CommitStatusData( status="dummy failure", description="dummy status", @@ -708,7 +736,9 @@ def main() -> int: job_status = CommitStatusData.load_status() # Storing job data (report_url) to restore OK GH status on job results reuse - if job_status.is_ok(): + if job_config.run_always: + print(f"Job [{job}] runs always in CI - do not mark as done") + elif job_status.is_ok(): success_flag_name = get_file_flag_name( job, indata["jobs_data"]["digests"][job], args.batch, num_batches ) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index f76aedac80b..031ab0be8a0 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,12 +1,18 @@ #!/usr/bin/env python3 +from enum import Enum import logging - from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field from pathlib import Path from typing import Callable, Dict, Iterable, List, Literal, Optional, Union +from integration_test_images import IMAGES + + +class Labels(Enum): + DO_NOT_TEST_LABEL = "do not test" + @dataclass class DigestConfig: @@ -22,6 +28,15 @@ class DigestConfig: git_submodules: bool = False +@dataclass +class LabelConfig: + """ + class to configure different CI scenarious per GH label + """ + + run_jobs: Iterable[str] = frozenset() + + @dataclass class JobConfig: """ @@ -37,6 +52,7 @@ class JobConfig: timeout: Optional[int] = None num_batches: int = 1 run_by_label: str = "" + run_always: bool = False @dataclass @@ -94,7 +110,7 @@ class TestConfig: BuildConfigs = Dict[str, BuildConfig] BuildsReportConfig = Dict[str, BuildReportConfig] TestConfigs = Dict[str, TestConfig] - +LabelConfigs = Dict[str, LabelConfig] # common digests configs compatibility_check_digest = DigestConfig( @@ -130,27 +146,13 @@ upgrade_check_digest = DigestConfig( integration_check_digest = DigestConfig( include_paths=["./tests/ci/integration_test_check.py", "./tests/integration"], exclude_files=[".md"], - docker=[ - "clickhouse/dotnet-client", - "clickhouse/integration-helper", - "clickhouse/integration-test", - "clickhouse/integration-tests-runner", - "clickhouse/kerberized-hadoop", - "clickhouse/kerberos-kdc", - "clickhouse/mysql-golang-client", - "clickhouse/mysql-java-client", - "clickhouse/mysql-js-client", - "clickhouse/mysql-php-client", - "clickhouse/nginx-dav", - "clickhouse/postgresql-java-client", - ], + docker=IMAGES.copy(), ) -# FIXME: which tests are AST_FUZZER_TEST? just python? -# FIXME: should ast fuzzer test be non-skipable? + ast_fuzzer_check_digest = DigestConfig( - include_paths=["./tests/ci/ast_fuzzer_check.py"], - exclude_files=[".md"], - docker=["clickhouse/fuzzer"], + # include_paths=["./tests/ci/ast_fuzzer_check.py"], + # exclude_files=[".md"], + # docker=["clickhouse/fuzzer"], ) unit_check_digest = DigestConfig( include_paths=["./tests/ci/unit_tests_check.py"], @@ -166,9 +168,9 @@ perf_check_digest = DigestConfig( docker=["clickhouse/performance-comparison"], ) sqllancer_check_digest = DigestConfig( - include_paths=["./tests/ci/sqlancer_check.py"], - exclude_files=[".md"], - docker=["clickhouse/sqlancer-test"], + # include_paths=["./tests/ci/sqlancer_check.py"], + # exclude_files=[".md"], + # docker=["clickhouse/sqlancer-test"], ) sqllogic_check_digest = DigestConfig( include_paths=["./tests/ci/sqllogic_test.py"], @@ -188,20 +190,9 @@ bugfix_validate_check = DigestConfig( "./tests/ci/bugfix_validate_check.py", ], exclude_files=[".md"], - docker=[ + docker=IMAGES.copy() + + [ "clickhouse/stateless-test", - "clickhouse/dotnet-client", - "clickhouse/integration-helper", - "clickhouse/integration-test", - "clickhouse/integration-tests-runner", - "clickhouse/kerberized-hadoop", - "clickhouse/kerberos-kdc", - "clickhouse/mysql-golang-client", - "clickhouse/mysql-java-client", - "clickhouse/mysql-js-client", - "clickhouse/mysql-php-client", - "clickhouse/nginx-dav", - "clickhouse/postgresql-java-client", ], ) # common test params @@ -226,6 +217,7 @@ upgrade_test_common_params = { astfuzzer_test_common_params = { "digest": ast_fuzzer_check_digest, "run_command": "ast_fuzzer_check.py", + "run_always": True, } integration_test_common_params = { "digest": integration_check_digest, @@ -242,6 +234,7 @@ perf_test_common_params = { sqllancer_test_common_params = { "digest": sqllancer_check_digest, "run_command": "sqlancer_check.py", + "run_always": True, } sqllogic_test_params = { "digest": sqllogic_check_digest, @@ -266,6 +259,13 @@ class CiConfig: builds_report_config: BuildsReportConfig test_configs: TestConfigs other_jobs_configs: TestConfigs + label_configs: LabelConfigs + + def get_label_config(self, label_name: str) -> Optional[LabelConfig]: + for label, config in self.label_configs.items(): + if label_name == label: + return config + return None def get_job_config(self, check_name: str) -> JobConfig: res = None @@ -415,6 +415,9 @@ class CiConfig: CI_CONFIG = CiConfig( + label_configs={ + Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]), + }, build_config={ "package_release": BuildConfig( name="package_release", @@ -609,9 +612,7 @@ CI_CONFIG = CiConfig( "Style check": TestConfig( "", job_config=JobConfig( - digest=DigestConfig( - include_paths=["."], exclude_dirs=[".git", "__pycache__"] - ) + run_always=True, ), ), "tests bugfix validate check": TestConfig( @@ -847,6 +848,7 @@ CI_CONFIG.validate() # checks required by Mergeable Check REQUIRED_CHECKS = [ + "PR Check", "ClickHouse build check", "ClickHouse special build check", "Docs Check", diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 26a826a19ad..f9fadae4e03 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -133,7 +133,7 @@ def main(): pr_info = PRInfo() commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, check_name) + atexit.register(update_mergeable_check, commit, pr_info, check_name) rerun_helper = RerunHelper(commit, check_name) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 2eac974858c..598eef9922e 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -15,11 +15,10 @@ from github.CommitStatus import CommitStatus from github.GithubException import GithubException from github.GithubObject import NotSet from github.IssueComment import IssueComment -from github.PullRequest import PullRequest from github.Repository import Repository from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription -from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH +from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL from report import ( ERROR, @@ -437,11 +436,11 @@ def set_mergeable_check( context=MERGEABLE_NAME, description=description, state=state, - target_url=GITHUB_RUN_URL, + target_url=GITHUB_JOB_URL(), ) -def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None: +def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: not_run = ( pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"}) or check_name not in REQUIRED_CHECKS @@ -454,7 +453,6 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None logging.info("Update Mergeable Check by %s", check_name) - commit = get_commit(gh, pr_info.sha) statuses = get_commit_filtered_statuses(commit) required_checks = [ @@ -475,14 +473,17 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None else: fail.append(status.context) + state: StatusType = SUCCESS + + if success: + description = ", ".join(success) + else: + description = "awaiting job statuses" + if fail: description = "failed: " + ", ".join(fail) - description = format_description(description) - if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description, FAILURE) - return - - description = ", ".join(success) + state = FAILURE description = format_description(description) + if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description) + set_mergeable_check(commit, description, state) diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 6f68918e63c..1424ab8895d 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -67,7 +67,7 @@ def main(): if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - atexit.register(update_mergeable_check, gh, pr_info, NAME) + atexit.register(update_mergeable_check, commit, pr_info, NAME) if not pr_info.has_changes_in_documentation() and not args.force: logging.info("No changes in documentation") diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 1ce6ab617ec..093537fdeb0 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -124,7 +124,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, NAME) + atexit.register(update_mergeable_check, commit, pr_info, NAME) rerun_helper = RerunHelper(commit, NAME) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 92d2fddef0f..6c615817164 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -18,9 +18,9 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) - # Update the Mergeable Check at the final step - update_mergeable_check(gh, pr_info, CI_STATUS_NAME) commit = get_commit(gh, pr_info.sha) + # Update the Mergeable Check at the final step + update_mergeable_check(commit, pr_info, CI_STATUS_NAME) statuses = [ status diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index c7ae91d88b2..4d81161b6de 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -254,7 +254,7 @@ def main(): ) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, check_name) + atexit.register(update_mergeable_check, commit, pr_info, check_name) if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 15ee7dc9620..ae1eaf4c06a 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -1,9 +1,10 @@ #!/usr/bin/env python """Helper for GitHub API requests""" import logging +import re from datetime import date, datetime, timedelta -from pathlib import Path from os import path as p +from pathlib import Path from time import sleep from typing import List, Optional, Tuple, Union @@ -143,7 +144,9 @@ class GitHub(github.Github): def get_pull_cached( self, repo: Repository, number: int, obj_updated_at: Optional[datetime] = None ) -> PullRequest: - cache_file = self.cache_path / f"pr-{number}.pickle" + # clean any special symbol from the repo name, especially '/' + repo_name = re.sub(r"\W", "_", repo.full_name) + cache_file = self.cache_path / f"pr-{repo_name}-{number}.pickle" if cache_file.is_file(): is_updated, cached_pr = self._is_cache_updated(cache_file, obj_updated_at) @@ -192,6 +195,32 @@ class GitHub(github.Github): with open(path, "rb") as ob_fd: return self.load(ob_fd) # type: ignore + # pylint: disable=protected-access + @staticmethod + def toggle_pr_draft(pr: PullRequest) -> None: + """GH rest API does not provide a way to toggle the draft status for PR""" + node_id = pr._rawData["node_id"] + if pr.draft: + action = ( + "mutation PullRequestReadyForReview($input:MarkPullRequestReadyForReviewInput!)" + "{markPullRequestReadyForReview(input: $input){pullRequest{id}}}" + ) + else: + action = ( + "mutation ConvertPullRequestToDraft($input:ConvertPullRequestToDraftInput!)" + "{convertPullRequestToDraft(input: $input){pullRequest{id}}}" + ) + query = { + "query": action, + "variables": {"input": {"pullRequestId": node_id}}, + } + url = f"{pr._requester.base_url}/graphql" + _, data = pr._requester.requestJsonAndCheck("POST", url, input=query) + if data.get("data"): + pr._draft = pr._makeBoolAttribute(not pr.draft) + + # pylint: enable=protected-access + def _is_cache_updated( self, cache_file: Path, obj_updated_at: Optional[datetime] ) -> Tuple[bool, object]: diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index b8cfa0acd3d..5ef65f3f38b 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -279,7 +279,7 @@ def main(): if CI: gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, args.check_name) + atexit.register(update_mergeable_check, commit, pr_info, args.check_name) rerun_helper = RerunHelper(commit, args.check_name) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 0a8f166e53e..c65f162f770 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -10,13 +10,8 @@ import sys from pathlib import Path from typing import Dict, List, Tuple -from github import Github - from build_download_helper import download_all_deb_packages -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import ( RerunHelper, get_commit, @@ -24,10 +19,12 @@ from commit_status_helper import ( post_commit_status, post_commit_status_to_file, ) -from docker_images_helper import DockerImage, pull_image, get_docker_image +from docker_images_helper import DockerImage, get_docker_image, pull_image from download_release_packages import download_last_release -from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY +from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from get_robot_token import get_best_robot_token +from github_helper import GitHub +from integration_test_images import IMAGES from pr_info import PRInfo from report import ERROR, TestResult, TestResults, read_test_results from s3_helper import S3Helper @@ -36,24 +33,6 @@ from tee_popen import TeePopen from upload_result_helper import upload_results -# When update, update -# tests/integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too -IMAGES = [ - "clickhouse/dotnet-client", - "clickhouse/integration-helper", - "clickhouse/integration-test", - "clickhouse/integration-tests-runner", - "clickhouse/kerberized-hadoop", - "clickhouse/kerberos-kdc", - "clickhouse/mysql-golang-client", - "clickhouse/mysql-java-client", - "clickhouse/mysql-js-client", - "clickhouse/mysql-php-client", - "clickhouse/nginx-dav", - "clickhouse/postgresql-java-client", -] - - def get_json_params_dict( check_name: str, pr_info: PRInfo, @@ -210,7 +189,7 @@ def main(): logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels) sys.exit(0) - gh = Github(get_best_robot_token(), per_page=100) + gh = GitHub(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) rerun_helper = RerunHelper(commit, check_name_with_group) diff --git a/tests/ci/integration_test_images.py b/tests/ci/integration_test_images.py new file mode 100644 index 00000000000..8148ac61181 --- /dev/null +++ b/tests/ci/integration_test_images.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +IMAGES_ENV = { + "clickhouse/dotnet-client": "DOCKER_DOTNET_CLIENT_TAG", + "clickhouse/integration-helper": "DOCKER_HELPER_TAG", + "clickhouse/integration-test": "DOCKER_BASE_TAG", + "clickhouse/integration-tests-runner": "", + "clickhouse/kerberized-hadoop": "DOCKER_KERBERIZED_HADOOP_TAG", + "clickhouse/kerberos-kdc": "DOCKER_KERBEROS_KDC_TAG", + "clickhouse/mysql-golang-client": "DOCKER_MYSQL_GOLANG_CLIENT_TAG", + "clickhouse/mysql-java-client": "DOCKER_MYSQL_JAVA_CLIENT_TAG", + "clickhouse/mysql-js-client": "DOCKER_MYSQL_JS_CLIENT_TAG", + "clickhouse/mysql-php-client": "DOCKER_MYSQL_PHP_CLIENT_TAG", + "clickhouse/nginx-dav": "DOCKER_NGINX_DAV_TAG", + "clickhouse/postgresql-java-client": "DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", + "clickhouse/python-bottle": "DOCKER_PYTHON_BOTTLE_TAG", +} + +IMAGES = list(IMAGES_ENV.keys()) + + +def get_image_env(image: str) -> str: + return IMAGES_ENV.get(image, "") + + +def get_docker_env(image: str, tag: str) -> str: + "if image belongs to IMAGES_ENV, return `-e` argument for docker command" + env = get_image_env(image) + if not env: + return env + return f"-e {env}={tag} " diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 58e78d571c5..6de0614541a 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -118,7 +118,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, check_name) + atexit.register(update_mergeable_check, commit, pr_info, check_name) temp_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index c023ca048d6..15558c81c7e 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,7 +2,7 @@ import json import logging import os -from typing import Dict, List, Set, Union, Literal +from typing import Dict, List, Set, Union from unidiff import PatchSet # type: ignore @@ -93,6 +93,7 @@ class PRInfo: github_event = PRInfo.default_event.copy() self.event = github_event self.changed_files = set() # type: Set[str] + self.changed_files_requested = False self.body = "" self.diff_urls = [] # type: List[str] # release_pr and merged_pr are used for docker images additional cache @@ -285,6 +286,7 @@ class PRInfo: response.raise_for_status() diff_object = PatchSet(response.text) self.changed_files.update({f.path for f in diff_object}) + self.changed_files_requested = True print(f"Fetched info about {len(self.changed_files)} changed files") def get_dict(self): @@ -297,9 +299,10 @@ class PRInfo: } def has_changes_in_documentation(self) -> bool: - # If the list wasn't built yet the best we can do is to - # assume that there were changes. - if self.changed_files is None or not self.changed_files: + if not self.changed_files_requested: + self.fetch_changed_files() + + if not self.changed_files: return True for f in self.changed_files: @@ -316,7 +319,11 @@ class PRInfo: checks if changes are docs related without other changes FIXME: avoid hardcoding filenames here """ + if not self.changed_files_requested: + self.fetch_changed_files() + if not self.changed_files: + # if no changes at all return False return False for f in self.changed_files: @@ -332,7 +339,10 @@ class PRInfo: return True def has_changes_in_submodules(self): - if self.changed_files is None or not self.changed_files: + if not self.changed_files_requested: + self.fetch_changed_files() + + if not self.changed_files: return True for f in self.changed_files: @@ -340,75 +350,6 @@ class PRInfo: return True return False - def can_skip_builds_and_use_version_from_master(self): - if FORCE_TESTS_LABEL in self.labels: - return False - - if self.changed_files is None or not self.changed_files: - return False - - return not any( - f.startswith("programs") - or f.startswith("src") - or f.startswith("base") - or f.startswith("cmake") - or f.startswith("rust") - or f == "CMakeLists.txt" - or f == "tests/ci/build_check.py" - for f in self.changed_files - ) - - def can_skip_integration_tests(self, versions: List[str]) -> bool: - if FORCE_TESTS_LABEL in self.labels: - return False - - # If docker image(s) relevant to integration tests are updated - if any(self.sha in version for version in versions): - return False - - if self.changed_files is None or not self.changed_files: - return False - - if not self.can_skip_builds_and_use_version_from_master(): - return False - - # Integration tests can be skipped if integration tests are not changed - return not any( - f.startswith("tests/integration/") - or f == "tests/ci/integration_test_check.py" - for f in self.changed_files - ) - - def can_skip_functional_tests( - self, version: str, test_type: Literal["stateless", "stateful"] - ) -> bool: - if FORCE_TESTS_LABEL in self.labels: - return False - - # If docker image(s) relevant to functional tests are updated - if self.sha in version: - return False - - if self.changed_files is None or not self.changed_files: - return False - - if not self.can_skip_builds_and_use_version_from_master(): - return False - - # Functional tests can be skipped if queries tests are not changed - if test_type == "stateless": - return not any( - f.startswith("tests/queries/0_stateless") - or f == "tests/ci/functional_test_check.py" - for f in self.changed_files - ) - else: # stateful - return not any( - f.startswith("tests/queries/1_stateful") - or f == "tests/ci/functional_test_check.py" - for f in self.changed_files - ) - class FakePRInfo: def __init__(self): diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index d1949a4da0b..108aa7d1946 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import atexit import sys import logging from typing import Tuple @@ -13,9 +14,8 @@ from commit_status_helper import ( post_commit_status, post_labels, remove_labels, - set_mergeable_check, + update_mergeable_check, ) -from docs_check import NAME as DOCS_NAME from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo @@ -24,6 +24,7 @@ from lambda_shared_package.lambda_shared.pr import ( TRUSTED_CONTRIBUTORS, check_pr_description, ) +from report import FAILURE TRUSTED_ORG_IDS = { 54801242, # clickhouse @@ -31,9 +32,9 @@ TRUSTED_ORG_IDS = { OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} CAN_BE_TESTED_LABEL = "can be tested" -DO_NOT_TEST_LABEL = "do not test" FEATURE_LABEL = "pr-feature" SUBMODULE_CHANGED_LABEL = "submodule changed" +PR_CHECK = "PR Check" def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): @@ -58,24 +59,16 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): # Returns whether we should look into individual checks for this PR. If not, it # can be skipped entirely. -# Returns can_run, description, labels_state -def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]: +# Returns can_run, description +def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]: # Consider the labels and whether the user is trusted. print("Got labels", pr_info.labels) if FORCE_TESTS_LABEL in pr_info.labels: print(f"Label '{FORCE_TESTS_LABEL}' set, forcing remaining checks") - return True, f"Labeled '{FORCE_TESTS_LABEL}'", "pending" - - if DO_NOT_TEST_LABEL in pr_info.labels: - print(f"Label '{DO_NOT_TEST_LABEL}' set, skipping remaining checks") - return False, f"Labeled '{DO_NOT_TEST_LABEL}'", "success" + return True, f"Labeled '{FORCE_TESTS_LABEL}'" if OK_SKIP_LABELS.intersection(pr_info.labels): - return ( - True, - "Don't try new checks for release/backports/cherry-picks", - "success", - ) + return True, "Don't try new checks for release/backports/cherry-picks" if CAN_BE_TESTED_LABEL not in pr_info.labels and not pr_is_by_trusted_user( pr_info.user_login, pr_info.user_orgs @@ -83,9 +76,9 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]: print( f"PRs by untrusted users need the '{CAN_BE_TESTED_LABEL}' label - please contact a member of the core team" ) - return False, "Needs 'can be tested' label", "failure" + return False, "Needs 'can be tested' label" - return True, "No special conditions apply", "pending" + return True, "No special conditions apply" def main(): @@ -98,7 +91,7 @@ def main(): print("::notice ::Cannot run, no PR exists for the commit") sys.exit(1) - can_run, description, labels_state = should_run_ci_for_pr(pr_info) + can_run, description = should_run_ci_for_pr(pr_info) if can_run and OK_SKIP_LABELS.intersection(pr_info.labels): print("::notice :: Early finish the check, running in a special PR") sys.exit(0) @@ -106,6 +99,7 @@ def main(): description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + atexit.register(update_mergeable_check, commit, pr_info, PR_CHECK) description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] @@ -136,22 +130,6 @@ def main(): if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - # FIXME: it should rather be in finish check. no reason to stop ci run. - if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation(): - print( - f"The '{FEATURE_LABEL}' in the labels, " - "but there's no changed documentation" - ) - post_commit_status( # do not pass pr_info here intentionally - commit, - "failure", - "", - f"expect adding docs for {FEATURE_LABEL}", - DOCS_NAME, - pr_info, - ) - sys.exit(0) - if description_error: print( "::error ::Cannot run, PR description does not match the template: " @@ -171,34 +149,40 @@ def main(): "failure", url, format_description(description_error), - CI_STATUS_NAME, + PR_CHECK, pr_info, ) sys.exit(1) - set_mergeable_check(commit, "skipped") - ci_report_url = create_ci_report(pr_info, []) + if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation(): + print( + f"The '{FEATURE_LABEL}' in the labels, " + "but there's no changed documentation" + ) + post_commit_status( + commit, + FAILURE, + "", + f"expect adding docs for {FEATURE_LABEL}", + PR_CHECK, + pr_info, + ) + # allow the workflow to continue + if not can_run: print("::notice ::Cannot run") - post_commit_status( - commit, - labels_state, - ci_report_url, - description, - CI_STATUS_NAME, - pr_info, - ) sys.exit(1) - else: - print("::notice ::Can run") - post_commit_status( - commit, - "pending", - ci_report_url, - description, - CI_STATUS_NAME, - pr_info, - ) + + ci_report_url = create_ci_report(pr_info, []) + print("::notice ::Can run") + post_commit_status( + commit, + "pending", + ci_report_url, + description, + CI_STATUS_NAME, + pr_info, + ) if __name__ == "__main__": diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 51f8e7d3551..49a53c9048c 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -62,6 +62,9 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.1: client_options.append("optimize_trivial_approximate_count_query=1") + if random.random() < 0.3: + client_options.append(f"http_make_head_request={random.randint(0, 1)}") + if client_options: options.append(" --client-option " + " ".join(client_options)) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index a5498fac393..b37dcb59237 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -145,7 +145,7 @@ def main(): gh = GitHub(get_best_robot_token(), create_cache_dir=False) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, NAME) + atexit.register(update_mergeable_check, commit, pr_info, NAME) rerun_helper = RerunHelper(commit, NAME) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index d6767cf8b7f..f1238a00bd4 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -187,7 +187,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, gh, pr_info, check_name) + atexit.register(update_mergeable_check, commit, pr_info, check_name) rerun_helper = RerunHelper(commit, check_name) if rerun_helper.is_already_finished_by_status(): diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 1609d8f3c07..1175d8342b1 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -32,7 +32,7 @@ from typing import Tuple, Union, Optional, Dict, Set, List import subprocess from subprocess import Popen from subprocess import PIPE -from datetime import datetime +from datetime import datetime, timedelta from time import time, sleep from errno import ESRCH @@ -279,36 +279,42 @@ def need_retry(args, stdout, stderr, total_time): ) -def get_processlist_with_stacktraces(args): - try: - if args.replicated_database: - return clickhouse_execute( +def get_processlist_size(args): + if args.replicated_database: + return int( + clickhouse_execute( args, """ - SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, * - -- NOTE: view() here to do JOIN on shards, instead of initiator - FROM clusterAllReplicas('test_cluster_database_replicated', view( SELECT - p.*, - arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap( - x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), - s.trace), '\n') AS stacktrace - )) AS stacktraces - FROM system.processes p - JOIN system.stack_trace s USING (query_id) + count() + FROM + FROM system.processes WHERE query NOT LIKE '%system.processes%' - GROUP BY p.* - )) - ORDER BY elapsed DESC FORMAT Vertical - """, - settings={ - "allow_introspection_functions": 1, - }, - ) - else: - return clickhouse_execute( + """, + ).strip() + ) + else: + return int( + clickhouse_execute( args, """ + SELECT + count() + FROM system.processes + WHERE query NOT LIKE '%system.processes%' + """, + ).strip() + ) + + +def get_processlist_with_stacktraces(args): + if args.replicated_database: + return clickhouse_execute( + args, + """ + SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, * + -- NOTE: view() here to do JOIN on shards, instead of initiator + FROM clusterAllReplicas('test_cluster_database_replicated', view( SELECT p.*, arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap( @@ -319,14 +325,35 @@ def get_processlist_with_stacktraces(args): JOIN system.stack_trace s USING (query_id) WHERE query NOT LIKE '%system.processes%' GROUP BY p.* - ORDER BY elapsed DESC FORMAT Vertical - """, - settings={ - "allow_introspection_functions": 1, - }, - ) - except Exception as e: - return "Failed to get processlist: " + str(e) + )) + ORDER BY elapsed DESC FORMAT Vertical + """, + settings={ + "allow_introspection_functions": 1, + }, + timeout=120, + ) + else: + return clickhouse_execute( + args, + """ + SELECT + p.*, + arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap( + x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), + s.trace), '\n') AS stacktrace + )) AS stacktraces + FROM system.processes p + JOIN system.stack_trace s USING (query_id) + WHERE query NOT LIKE '%system.processes%' + GROUP BY p.* + ORDER BY elapsed DESC FORMAT Vertical + """, + settings={ + "allow_introspection_functions": 1, + }, + timeout=120, + ) def get_transactions_list(args): @@ -611,6 +638,13 @@ class SettingsRandomizer: "compile_sort_description": lambda: random.randint(0, 1), "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32), "optimize_distinct_in_order": lambda: random.randint(0, 1), + "max_bytes_before_external_sort": threshold_generator( + 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 + ), + "max_bytes_before_external_group_by": threshold_generator( + 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 + ), + "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000), "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1), "http_response_buffer_size": lambda: random.randint(0, 10 * 1048576), "http_wait_end_of_query": lambda: random.random() > 0.5, @@ -997,7 +1031,24 @@ class TestCase: if proc: if proc.returncode is None: try: - proc.kill() + pgid = os.getpgid(proc.pid) + # NOTE: this still may leave some processes, that had been + # created by timeout(1), since it also creates new process + # group. But this should not be a problem with default + # options, since the default time for each test is 10min, + # and this is way more bigger then the timeout for each + # timeout(1) invocation. + # + # But as a workaround we are sending SIGTERM first, and + # only after SIGKILL, that way timeout(1) will have an + # ability to terminate childrens (though not always since + # signals are asynchronous). + os.killpg(pgid, signal.SIGTERM) + # This may not be enough, but this is at least something + # (and anyway it is OK to spend 0.1 second more in case of + # test timeout). + sleep(0.1) + os.killpg(pgid, signal.SIGKILL) except OSError as e: if e.errno != ESRCH: raise @@ -1273,7 +1324,7 @@ class TestCase: command = pattern.format(**params) - proc = Popen(command, shell=True, env=os.environ) + proc = Popen(command, shell=True, env=os.environ, start_new_session=True) while ( datetime.now() - start_time @@ -2273,7 +2324,7 @@ def reportLogStats(args): 'Attempt to read after eof', 'String size is too big ({}), maximum: {}' ) AS known_short_messages SELECT count() AS c, message_format_string, substr(any(message), 1, 120), - min(if(notEmpty(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)') as prefix), prefix, length(message)) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate + min(if(length(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(message)) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (length(message_format_string) < 16 @@ -2420,11 +2471,42 @@ def main(args): if args.hung_check: # Some queries may execute in background for some time after test was finished. This is normal. - for _ in range(1, 60): - processlist = get_processlist_with_stacktraces(args) - if not processlist: - break - sleep(1) + print("Checking the hung queries: ", end="") + hung_count = 0 + try: + deadline = datetime.now() + timedelta(seconds=90) + while datetime.now() < deadline: + hung_count = get_processlist_size(args) + if hung_count == 0: + print(" done") + break + print(". ", end="") + except Exception as e: + print( + colored( + "\nHung check failed. Failed to get processlist size: " + str(e), + args, + "red", + attrs=["bold"], + ) + ) + exit_code.value = 1 + + processlist = "" + if hung_count > 0: + try: + processlist = get_processlist_with_stacktraces(args) + except Exception as e: + print( + colored( + "\nHung check failed. Failed to get processlist with stacktraces: " + + str(e), + args, + "red", + attrs=["bold"], + ) + ) + exit_code.value = 1 if processlist: print( diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml new file mode 100644 index 00000000000..5f45640a923 --- /dev/null +++ b/tests/config/config.d/storage_conf_02944.xml @@ -0,0 +1,25 @@ + + + + + s3 + s3_disk/ + http://localhost:11111/test/test_02944/ + clickhouse + clickhouse + 20000 + + + cache + s3_disk + s3_cache_02944/ + 100 + 10 + 10 + 10 + 100 + 0 + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 6046f05c922..2f9fd44c9b0 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -156,6 +156,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then fi ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/ fi diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index a3ad94a59ec..7c922e339fe 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -from collections import defaultdict import csv import glob import json @@ -8,13 +7,15 @@ import logging import os import random import re +import shlex import shutil import string import subprocess import time -import shlex import zlib # for crc32 +from collections import defaultdict +from integration_test_images import IMAGES MAX_RETRY = 1 NUM_WORKERS = 5 @@ -301,23 +302,6 @@ class ClickhouseIntegrationTestsRunner: def shuffle_test_groups(self): return self.shuffle_groups != 0 - @staticmethod - def get_images_names(): - return [ - "clickhouse/dotnet-client", - "clickhouse/integration-helper", - "clickhouse/integration-test", - "clickhouse/integration-tests-runner", - "clickhouse/kerberized-hadoop", - "clickhouse/kerberos-kdc", - "clickhouse/mysql-golang-client", - "clickhouse/mysql-java-client", - "clickhouse/mysql-js-client", - "clickhouse/mysql-php-client", - "clickhouse/nginx-dav", - "clickhouse/postgresql-java-client", - ] - def _pre_pull_images(self, repo_path): image_cmd = self._get_runner_image_cmd(repo_path) @@ -523,7 +507,7 @@ class ClickhouseIntegrationTestsRunner: os.path.join(repo_path, "tests/integration", "runner"), "--docker-image-version", ): - for img in self.get_images_names(): + for img in IMAGES: if img == "clickhouse/integration-tests-runner": runner_version = self.get_image_version(img) logging.info( diff --git a/tests/integration/integration_test_images.py b/tests/integration/integration_test_images.py new file mode 120000 index 00000000000..1b344702aea --- /dev/null +++ b/tests/integration/integration_test_images.py @@ -0,0 +1 @@ +../ci/integration_test_images.py \ No newline at end of file diff --git a/tests/integration/runner b/tests/integration/runner index 3760bf16b84..b1193b5b471 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -1,17 +1,17 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import subprocess -import os -import getpass -import glob import argparse +import glob import logging -import signal -import subprocess -import sys -import string +import os import random import shlex +import signal +import string +import subprocess +import sys + +from integration_test_images import get_docker_env def random_str(length=6): @@ -335,30 +335,11 @@ if __name__ == "__main__": if args.docker_compose_images_tags is not None: for img_tag in args.docker_compose_images_tags: [image, tag] = img_tag.split(":") - if image == "clickhouse/dotnet-client": - env_tags += "-e {}={} ".format("DOCKER_DOTNET_CLIENT_TAG", tag) - elif image == "clickhouse/integration-helper": - env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) - elif image == "clickhouse/integration-test": - env_tags += "-e {}={} ".format("DOCKER_BASE_TAG", tag) - elif image == "clickhouse/kerberized-hadoop": - env_tags += "-e {}={} ".format("DOCKER_KERBERIZED_HADOOP_TAG", tag) - elif image == "clickhouse/kerberos-kdc": - env_tags += "-e {}={} ".format("DOCKER_KERBEROS_KDC_TAG", tag) - elif image == "clickhouse/mysql-golang-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-java-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-js-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-php-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) - elif image == "clickhouse/nginx-dav": - env_tags += "-e {}={} ".format("DOCKER_NGINX_DAV_TAG", tag) - elif image == "clickhouse/postgresql-java-client": - env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) + env_tag = get_docker_env(image, tag) + if env_tag: + env_tags += env_tag else: - logging.info("Unknown image %s" % (image)) + logging.info("Unknown image %s", image) # create named volume which will be used inside to store images and other docker related files, # to avoid redownloading it every time diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 478124ad41b..cd8f70b3239 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -445,3 +445,10 @@ def test_backup_with_fs_cache( # see MergeTreeData::initializeDirectoriesAndFormatVersion() if "CachedWriteBufferCacheWriteBytes" in restore_events: assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1 + + +def test_backup_to_zip(): + storage_policy = "default" + backup_name = new_backup_name() + backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.zip', 'minio', 'minio123')" + check_backup_and_restore(storage_policy, backup_destination) diff --git a/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml b/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml deleted file mode 100644 index e62425fe1bb..00000000000 --- a/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - - - true - - test - node1 - 9000 - - - test - node2 - 9000 - - - - - - true - - test - node3 - 9000 - - - test - node4 - 9000 - - - test - node5 - 9000 - - - - - - diff --git a/tests/integration/test_max_http_connections_for_replication/test.py b/tests/integration/test_max_http_connections_for_replication/test.py deleted file mode 100644 index bcb779ee913..00000000000 --- a/tests/integration/test_max_http_connections_for_replication/test.py +++ /dev/null @@ -1,157 +0,0 @@ -import time -from multiprocessing.dummy import Pool - -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - - -def _fill_nodes(nodes, shard, connections_count): - for node in nodes: - node.query( - """ - CREATE DATABASE test; - - CREATE TABLE test_table(date Date, id UInt32, dummy UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') - PARTITION BY date - ORDER BY id - SETTINGS - replicated_max_parallel_fetches_for_host={connections}, - index_granularity=8192; - """.format( - shard=shard, replica=node.name, connections=connections_count - ) - ) - - -cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", - user_configs=[], - main_configs=["configs/remote_servers.xml"], - with_zookeeper=True, -) -node2 = cluster.add_instance( - "node2", - user_configs=[], - main_configs=["configs/remote_servers.xml"], - with_zookeeper=True, -) - - -@pytest.fixture(scope="module") -def start_small_cluster(): - try: - cluster.start() - - _fill_nodes([node1, node2], 1, 1) - - yield cluster - - finally: - cluster.shutdown() - - -def test_single_endpoint_connections_count(start_small_cluster): - node1.query("TRUNCATE TABLE test_table") - node2.query("SYSTEM SYNC REPLICA test_table") - - def task(count): - print(("Inserting ten times from {}".format(count))) - for i in range(count, count + 10): - node1.query("insert into test_table values ('2017-06-16', {}, 0)".format(i)) - - p = Pool(10) - p.map(task, range(0, 100, 10)) - - assert_eq_with_retry(node1, "select count() from test_table", "100") - assert_eq_with_retry(node2, "select count() from test_table", "100") - - assert ( - node2.query( - "SELECT value FROM system.events where event='CreatedHTTPConnections'" - ) - == "1\n" - ) - - -def test_keepalive_timeout(start_small_cluster): - node1.query("TRUNCATE TABLE test_table") - node2.query("SYSTEM SYNC REPLICA test_table") - - node1.query("insert into test_table values ('2017-06-16', 777, 0)") - assert_eq_with_retry(node2, "select count() from test_table", str(1)) - # Server keepAliveTimeout is 3 seconds, default client session timeout is 8 - # lets sleep in that interval - time.sleep(4) - - node1.query("insert into test_table values ('2017-06-16', 888, 0)") - - time.sleep(3) - - assert_eq_with_retry(node2, "select count() from test_table", str(2)) - - assert not node2.contains_in_log( - "No message received" - ), "Found 'No message received' in clickhouse-server.log" - - -node3 = cluster.add_instance( - "node3", - user_configs=[], - main_configs=["configs/remote_servers.xml"], - with_zookeeper=True, -) -node4 = cluster.add_instance( - "node4", - user_configs=[], - main_configs=["configs/remote_servers.xml"], - with_zookeeper=True, -) -node5 = cluster.add_instance( - "node5", - user_configs=[], - main_configs=["configs/remote_servers.xml"], - with_zookeeper=True, -) - - -@pytest.fixture(scope="module") -def start_big_cluster(): - try: - cluster.start() - - _fill_nodes([node3, node4, node5], 2, 2) - - yield cluster - - finally: - cluster.shutdown() - - -def test_multiple_endpoint_connections_count(start_big_cluster): - def task(count): - print(("Inserting ten times from {}".format(count))) - if (count / 10) % 2 == 1: - node = node3 - else: - node = node4 - - for i in range(count, count + 10): - node.query("insert into test_table values ('2017-06-16', {}, 0)".format(i)) - - p = Pool(10) - p.map(task, range(0, 100, 10)) - - assert_eq_with_retry(node3, "select count() from test_table", "100") - assert_eq_with_retry(node4, "select count() from test_table", "100") - assert_eq_with_retry(node5, "select count() from test_table", "100") - - # Two per each host or sometimes less, if fetches are not performed in parallel. But not more. - assert ( - node5.query( - "SELECT value FROM system.events where event='CreatedHTTPConnections'" - ) - <= "4\n" - ) diff --git a/tests/integration/test_quorum_inserts_parallel/test.py b/tests/integration/test_quorum_inserts_parallel/test.py index 72780c16319..f30f57cc1d6 100644 --- a/tests/integration/test_quorum_inserts_parallel/test.py +++ b/tests/integration/test_quorum_inserts_parallel/test.py @@ -115,9 +115,8 @@ def test_parallel_quorum_actually_quorum(started_cluster): error = node.query_and_get_error( "INSERT INTO q VALUES(3, 'Hi')", settings=settings ) - assert "DB::Exception: Unknown status, client must retry." in error, error assert ( - "DB::Exception: Timeout while waiting for quorum. (TIMEOUT_EXCEEDED)" + "DB::Exception: Unknown quorum status. The data was inserted in the local replica but we could not verify quorum. Reason: Timeout while waiting for quorum" in error ), error diff --git a/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml b/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml new file mode 100644 index 00000000000..b163c6f54a1 --- /dev/null +++ b/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml @@ -0,0 +1 @@ + diff --git a/tests/integration/test_replicated_fetches_timeouts/test.py b/tests/integration/test_replicated_fetches_timeouts/test.py index 7d5da55549c..55fa4b909ba 100644 --- a/tests/integration/test_replicated_fetches_timeouts/test.py +++ b/tests/integration/test_replicated_fetches_timeouts/test.py @@ -10,13 +10,25 @@ from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", with_zookeeper=True, main_configs=["configs/server.xml"] + "node1", + with_zookeeper=True, + main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"], ) node2 = cluster.add_instance( - "node2", with_zookeeper=True, main_configs=["configs/server.xml"] + "node2", + with_zookeeper=True, + stay_alive=True, + main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"], ) +config = """ + + 30 + 1 + +""" + @pytest.fixture(scope="module") def started_cluster(): @@ -49,14 +61,10 @@ def test_no_stall(started_cluster): node2.query("SYSTEM STOP FETCHES t") node1.query( - "INSERT INTO t SELECT 1, '{}' FROM numbers(500)".format( - get_random_string(104857) - ) + f"INSERT INTO t SELECT 1, '{get_random_string(104857)}' FROM numbers(500)" ) node1.query( - "INSERT INTO t SELECT 2, '{}' FROM numbers(500)".format( - get_random_string(104857) - ) + f"INSERT INTO t SELECT 2, '{get_random_string(104857)}' FROM numbers(500)" ) with PartitionManager() as pm: @@ -82,14 +90,12 @@ def test_no_stall(started_cluster): print("Connection timeouts tested!") - # Increase connection timeout and wait for receive timeouts. - node2.query( - """ - ALTER TABLE t - MODIFY SETTING replicated_fetches_http_connection_timeout = 30, - replicated_fetches_http_receive_timeout = 1""" + node2.replace_config( + "/etc/clickhouse-server/config.d/timeouts_for_fetches.xml", config ) + node2.restart_clickhouse() + while True: timeout_exceptions = int( node2.query( diff --git a/tests/integration/test_storage_postgresql/configs/named_collections.xml b/tests/integration/test_storage_postgresql/configs/named_collections.xml index 129225f36b9..4923c21d0a6 100644 --- a/tests/integration/test_storage_postgresql/configs/named_collections.xml +++ b/tests/integration/test_storage_postgresql/configs/named_collections.xml @@ -29,5 +29,12 @@ postgres test_replicas
+ + postgres + mysecretpassword + postgres1:5432 + postgres + test_table
+
diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index a1b13739b5b..d9f3a9917ab 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -82,6 +82,30 @@ def test_postgres_select_insert(started_cluster): cursor.execute(f"DROP TABLE {table_name} ") +def test_postgres_addresses_expr(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + table_name = "test_table" + table = f"""postgresql(`postgres5`)""" + cursor.execute(f"DROP TABLE IF EXISTS {table_name}") + cursor.execute(f"CREATE TABLE {table_name} (a integer, b text, c integer)") + + node1.query( + f""" + INSERT INTO TABLE FUNCTION {table} + SELECT number, concat('name_', toString(number)), 3 from numbers(10000)""" + ) + check1 = f"SELECT count() FROM {table}" + check2 = f"SELECT Sum(c) FROM {table}" + check3 = f"SELECT count(c) FROM {table} WHERE a % 2 == 0" + check4 = f"SELECT count() FROM {table} WHERE b LIKE concat('name_', toString(1))" + assert (node1.query(check1)).rstrip() == "10000" + assert (node1.query(check2)).rstrip() == "30000" + assert (node1.query(check3)).rstrip() == "5000" + assert (node1.query(check4)).rstrip() == "1" + + cursor.execute(f"DROP TABLE {table_name} ") + + def test_postgres_conversions(started_cluster): cursor = started_cluster.postgres_conn.cursor() cursor.execute(f"DROP TABLE IF EXISTS test_types") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 16183733656..2549cb0d473 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -626,7 +626,7 @@ def test_wrong_s3_syntax(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance expected_err_msg = "Code: 42" # NUMBER_OF_ARGUMENTS_DOESNT_MATCH - query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '')" + query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '', '')" assert expected_err_msg in instance.query_and_get_error(query) expected_err_msg = "Code: 36" # BAD_ARGUMENTS @@ -1395,6 +1395,7 @@ def test_schema_inference_from_globs(started_cluster): def test_signatures(started_cluster): + session_token = "session token that will not be checked by MiniIO" bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] @@ -1417,6 +1418,11 @@ def test_signatures(started_cluster): ) assert int(result) == 1 + result = instance.query( + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}')" + ) + assert int(result) == 1 + result = instance.query( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'Arrow', 'x UInt64', 'auto')" ) @@ -1427,6 +1433,21 @@ def test_signatures(started_cluster): ) assert int(result) == 1 + result = instance.query( + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow')" + ) + assert int(result) == 1 + + lt = instance.query( + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64')" + ) + assert int(result) == 1 + + lt = instance.query( + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64', 'auto')" + ) + assert int(result) == 1 + def test_select_columns(started_cluster): bucket = started_cluster.minio_bucket diff --git a/tests/performance/README.md b/tests/performance/README.md index f554e96203b..289ecaba034 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -18,5 +18,5 @@ TODO @akuzm ``` pip3 install clickhouse_driver scipy -../../docker/test/performance-comparison/perf.py --runs 1 insert_parallel.xml +../../tests/performance/scripts/perf.py --runs 1 insert_parallel.xml ``` diff --git a/tests/performance/if.xml b/tests/performance/if.xml new file mode 100644 index 00000000000..f4d0e8f9773 --- /dev/null +++ b/tests/performance/if.xml @@ -0,0 +1,12 @@ + + + 42949673, zero + 1, zero + 2)) ]]> + + + + + + + + + diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference index cd9f0142d45..d8c0db3b996 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference @@ -1,15 +1,15 @@ -runtime messages 0.001 -runtime exceptions 0.05 -unknown runtime exceptions 0.01 -messages shorter than 10 1 -messages shorter than 16 3 -exceptions shorter than 30 3 [] -noisy messages 0.3 -noisy Trace messages 0.16 -noisy Debug messages 0.09 -noisy Info messages 0.05 -noisy Warning messages 0.01 -noisy Error messages 0.02 +runtime messages 0.001 [] +runtime exceptions 0.05 [] +unknown runtime exceptions 0.01 [] +messages shorter than 10 1 [] +messages shorter than 16 1 [] +exceptions shorter than 30 1 [] +noisy messages 0.3 +noisy Trace messages 0.16 +noisy Debug messages 0.09 +noisy Info messages 0.05 +noisy Warning messages 0.01 +noisy Error messages 0.03 no Fatal messages 0 number of too noisy messages 3 number of noisy messages 10 diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 062806baae9..3a83126ea11 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -9,57 +9,174 @@ create view logs as select * from system.text_log where now() - toIntervalMinute -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 -select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs - where message not like '% Received from %clickhouse-staging.com:9440%'; +WITH 0.001 AS threshold +SELECT + 'runtime messages', + greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold), + v <= threshold ? [] : + (SELECT groupArray((message, c)) FROM ( + SELECT message, count() as c FROM logs + WHERE + length(message_format_string) = 0 + AND message not like '% Received from %clickhouse-staging.com:9440%' + AND source_file not like '%/AWSLogger.cpp%' + GROUP BY message ORDER BY c LIMIT 10 + )) +FROM logs +WHERE + message NOT LIKE '% Received from %clickhouse-staging.com:9440%' + AND source_file not like '%/AWSLogger.cpp%'; -- Check the same for exceptions. The value was 0.03 -select 'runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs - where (message like '%DB::Exception%' or message like '%Coordination::Exception%') - and message not like '% Received from %clickhouse-staging.com:9440%'; +WITH 0.05 AS threshold +SELECT + 'runtime exceptions', + greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold), + v <= threshold ? [] : + (SELECT groupArray((message, c)) FROM ( + SELECT message, count() as c FROM logs + WHERE + length(message_format_string) = 0 + AND (message like '%DB::Exception%' or message like '%Coordination::Exception%') + AND message not like '% Received from %clickhouse-staging.com:9440%' + GROUP BY message ORDER BY c LIMIT 10 + )) +FROM logs +WHERE + message NOT LIKE '% Received from %clickhouse-staging.com:9440%' + AND (message like '%DB::Exception%' or message like '%Coordination::Exception%'); + +WITH 0.01 AS threshold +SELECT + 'unknown runtime exceptions', + greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold), + v <= threshold ? [] : + (SELECT groupArray((message, c)) FROM ( + SELECT message, count() as c FROM logs + WHERE + length(message_format_string) = 0 + AND (message like '%DB::Exception%' or message like '%Coordination::Exception%') + AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%' + GROUP BY message ORDER BY c LIMIT 10 + )) +FROM logs +WHERE + (message like '%DB::Exception%' or message like '%Coordination::Exception%') + AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'; -select 'unknown runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where - (message like '%DB::Exception%' or message like '%Coordination::Exception%') - and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'; -- FIXME some of the following messages are not informative and it has to be fixed -create temporary table known_short_messages (s String) as select * from (select -['', '{} ({})', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', -'Creating set.', 'Cyclic aliases', 'Detaching {}', 'Executing {}', 'Fire events: {}', -'Found part {}', 'Loaded queue', 'No sharding key', 'No tables', 'Query: {}', 'Removed', -'Removed part {}', 'Removing parts.', 'Request URI: {}', 'Sending part {}', -'Sent handshake', 'Starting {}', 'Will mimic {}', 'Writing to {}', 'dropIfEmpty', -'loadAll {}', '{} ({}:{})', '{} -> {}', '{} {}', '{}: {}', '{}%', 'Read object: {}', -'New segment: {}', 'Convert overflow', 'Division by zero', 'Files set to {}', -'Bytes set to {}', 'Numeric overflow', 'Invalid mode: {}', -'Write file: {}', 'Unable to parse JSONPath', 'Host is empty in S3 URI.', 'Expected end of line', -'inflate failed: {}{}', 'Center is not valid', 'Column ''{}'' is ambiguous', 'Cannot parse object', 'Invalid date: {}', -'There is no cache by name: {}', 'No part {} in table', '`{}` should be a String', 'There are duplicate id {}', -'Invalid replica name: {}', 'Unexpected value {} in enum', 'Unknown BSON type: {}', 'Point is not valid', -'Invalid qualified name: {}', 'INTO OUTFILE is not allowed', 'Arguments must not be NaN', 'Cell is not valid', -'brotli decode error{}', 'Invalid H3 index: {}', 'Too large node state size', 'No additional keys found.', -'Attempt to read after EOF.', 'Replication was stopped', '{} building file infos', 'Cannot parse uuid {}', -'Query was cancelled', 'Cancelled merging parts', 'Cancelled mutating parts', 'Log pulling is cancelled', -'Transaction was cancelled', 'Could not find table: {}', 'Table {} does not exist', -'Database {} does not exist', 'Dictionary ({}) not found', 'Unknown table function {}', -'Unknown format {}', 'Unknown explain kind ''{}''', 'Unknown setting {}', 'Unknown input format {}', -'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', -'Attempt to read after eof', 'String size is too big ({}), maximum: {}', -'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}', -'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64', -'Unknown statistic column: {}', -'Bad SSH public key provided', 'Database {} does not exist', 'Substitution {} is not set', 'Invalid cache key hex: {}' -] as arr) array join arr; +create temporary table known_short_messages (s String) as select * from (select [ + '', + '({}) Keys: {}', + '({}) {}', + 'Aggregating', + 'Attempt to read after EOF.', + 'Attempt to read after eof', + 'Bad SSH public key provided', + 'Became leader', + 'Bytes set to {}', + 'Cancelled merging parts', + 'Cancelled mutating parts', + 'Cannot parse date here: {}', + 'Cannot parse object', + 'Cannot parse uuid {}', + 'Cleaning queue', + 'Column \'{}\' is ambiguous', + 'Convert overflow', + 'Could not find table: {}', + 'Creating {}: {}', + 'Cyclic aliases', + 'Database {} does not exist', + 'Detaching {}', + 'Dictionary ({}) not found', + 'Division by zero', + 'Executing {}', + 'Expected end of line', + 'Expected function, got: {}', + 'Files set to {}', + 'Fire events: {}', + 'Found part {}', + 'Host is empty in S3 URI.', + 'INTO OUTFILE is not allowed', + 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64', + 'Illegal UTF-8 sequence, while processing \'{}\'', + 'Invalid cache key hex: {}', + 'Invalid date: {}', + 'Invalid mode: {}', + 'Invalid qualified name: {}', + 'Invalid replica name: {}', + 'Loaded queue', + 'Log pulling is cancelled', + 'New segment: {}', + 'No additional keys found.', + 'No part {} in table', + 'No sharding key', + 'No tables', + 'Numeric overflow', + 'Path to archive is empty', + 'Processed: {}%', + 'Query was cancelled', + 'Query: {}', + 'Read object: {}', + 'Removed part {}', + 'Removing parts.', + 'Replication was stopped', + 'Request URI: {}', + 'Sending part {}', + 'Sent handshake', + 'Starting {}', + 'String size is too big ({}), maximum: {}', + 'Substitution {} is not set', + 'Table {} does not exist', + 'Table {}.{} doesn\'t exist', + 'There are duplicate id {}', + 'There is no cache by name: {}', + 'Too large node state size', + 'Transaction was cancelled', + 'Unable to parse JSONPath', + 'Unexpected value {} in enum', + 'Unknown BSON type: {}', + 'Unknown explain kind \'{}\'', + 'Unknown format {}', + 'Unknown identifier: \'{}\'', + 'Unknown input format {}', + 'Unknown setting {}', + 'Unknown statistic column: {}', + 'Unknown table function {}', + 'User has been dropped', + 'User name is empty', + 'Will mimic {}', + 'Write file: {}', + 'Writing to {}', + '`{}` should be a String', + 'brotli decode error{}', + 'dropIfEmpty', + 'inflate failed: {}{}', + 'loadAll {}', + '{} ({})', + '{} ({}:{})', + '{} -> {}', + '{} {}', + '{}%', + '{}: {}' + ] as arr) array join arr; -- Check that we don't have too many short meaningless message patterns. +WITH 1 AS max_messages select 'messages shorter than 10', - greatest(uniqExact(message_format_string), 1) + (uniqExact(message_format_string) as c) <= max_messages, + c <= max_messages ? [] : groupUniqArray(message_format_string) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages; -- Same as above. Feel free to update the threshold or remove this query if really necessary +WITH 3 AS max_messages select 'messages shorter than 16', - greatest(uniqExact(message_format_string), 3) - from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages; + (uniqExact(message_format_string) as c) <= max_messages, + c <= max_messages ? [] : groupUniqArray(message_format_string) + from logs + where length(message_format_string) < 16 and message_format_string not in known_short_messages; -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.: -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)" @@ -68,40 +185,53 @@ select 'messages shorter than 16', -- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough. -- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.: -- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]" +WITH 3 AS max_messages select 'exceptions shorter than 30', - greatest(uniqExact(message_format_string), 3) AS c, - c = 3 ? [] : groupUniqArray(message_format_string) + (uniqExact(message_format_string) as c) <= max_messages, + c <= max_messages ? [] : groupUniqArray(message_format_string) from logs where message ilike '%DB::Exception%' and if(length(extract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages; - -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold -select 'noisy messages', - greatest((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30); +WITH 0.30 as threshold +select + 'noisy messages', + greatest(coalesce(((select message_format_string, count() from logs group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; -- Same as above, but excluding Test level (actually finds top 1 Trace message) -with ('Access granted: {}{}', '{} -> {}') as frequent_in_tests -select 'noisy Trace messages', - greatest((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests - group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16); +with 0.16 as threshold +select + 'noisy Trace messages', + greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}') + group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; -- Same as above for Debug +WITH 0.09 as threshold select 'noisy Debug messages', - greatest((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.09); + greatest(coalesce(((select message_format_string, count() from logs where level = 'Debug' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; -- Same as above for Info +WITH 0.05 as threshold select 'noisy Info messages', - greatest((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.05); + greatest(coalesce(((select message_format_string, count() from logs where level = 'Information' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; -- Same as above for Warning -with ('Not enabled four letter command {}') as frequent_in_tests -select 'noisy Warning messages', - greatest(coalesce((select count() from logs where level = 'Warning' and message_format_string not in frequent_in_tests - group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.01); +with 0.01 as threshold +select + 'noisy Warning messages', + greatest(coalesce(((select message_format_string, count() from logs where level = 'Warning' and message_format_string not in ('Not enabled four letter command {}') + group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; -- Same as above for Error +WITH 0.03 as threshold select 'noisy Error messages', - greatest(coalesce((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.02); + greatest(coalesce(((select message_format_string, count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, + r <= threshold ? '' : top_message.1; select 'no Fatal messages', count() from logs where level = 'Fatal'; diff --git a/tests/queries/0_stateless/00109_shard_totals_after_having.sql b/tests/queries/0_stateless/00109_shard_totals_after_having.sql index b17accc0dae..dce265e0552 100644 --- a/tests/queries/0_stateless/00109_shard_totals_after_having.sql +++ b/tests/queries/0_stateless/00109_shard_totals_after_having.sql @@ -4,6 +4,9 @@ SET max_rows_to_group_by = 100000; SET max_block_size = 100001; SET group_by_overflow_mode = 'any'; +-- Settings 'max_rows_to_group_by' and 'max_bytes_before_external_group_by' are mutually exclusive. +SET max_bytes_before_external_group_by = 0; + DROP TABLE IF EXISTS numbers500k; CREATE TABLE numbers500k (number UInt32) ENGINE = TinyLog; diff --git a/tests/queries/0_stateless/00119_storage_join.sql b/tests/queries/0_stateless/00119_storage_join.sql index 2569a64d2c3..cd255cdfe24 100644 --- a/tests/queries/0_stateless/00119_storage_join.sql +++ b/tests/queries/0_stateless/00119_storage_join.sql @@ -12,7 +12,7 @@ SELECT x, s, k FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LE SELECT 1, x, 2, s, 3, k, 4 FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k; SELECT t1.k, t1.s, t2.x -FROM ( SELECT number AS k, 'a' AS s FROM numbers(2) GROUP BY number WITH TOTALS ) AS t1 +FROM ( SELECT number AS k, 'a' AS s FROM numbers(2) GROUP BY number WITH TOTALS ORDER BY number) AS t1 ANY LEFT JOIN t2 AS t2 USING(k); DROP TABLE t2; diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh index 389a2cd9684..a42fd58190a 100755 --- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh +++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh @@ -16,7 +16,7 @@ if [ -n "$DBMS_TESTS_UNDER_VALGRIND" ]; then fi for i in $(seq 1000000 $((20000 * $STEP_MULTIPLIER)) 10000000 && seq 10100000 $((100000 * $STEP_MULTIPLIER)) 50000000); do - $CLICKHOUSE_CLIENT --max_memory_usage="$i" --query=" + $CLICKHOUSE_CLIENT --max_memory_usage="$i" --max_bytes_before_external_group_by 0 --query=" SELECT intDiv(number, 5) AS k, max(toString(number)) FROM remote('127.0.0.{2,3}', ${CLICKHOUSE_DATABASE}.numbers_100k) GROUP BY k ORDER BY k LIMIT 1; " 2> /dev/null; CODE=$?; diff --git a/tests/queries/0_stateless/00155_long_merges.sh b/tests/queries/0_stateless/00155_long_merges.sh index 9ed0f2c6de1..8ecca0aeb42 100755 --- a/tests/queries/0_stateless/00155_long_merges.sh +++ b/tests/queries/0_stateless/00155_long_merges.sh @@ -34,32 +34,40 @@ function test { SETTINGS="--min_insert_block_size_rows=0 --min_insert_block_size_bytes=0 --max_block_size=65505" + $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES summing_00155" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO summing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $1" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO summing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $2" + $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES collapsing_00155" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO collapsing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $1" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO collapsing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $2" + $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES aggregating_00155" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO aggregating_00155 (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $1) GROUP BY number" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO aggregating_00155 (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $2) GROUP BY number" + $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES replacing_00155" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO replacing_00155 (x, v) SELECT number AS x, toUInt64(number % 3 == 0) FROM system.numbers LIMIT $1" $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO replacing_00155 (x, v) SELECT number AS x, toUInt64(number % 3 == 1) FROM system.numbers LIMIT $2" $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM summing_00155" + $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES summing_00155" $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE summing_00155" $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $SUM FROM summing_00155" echo $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM collapsing_00155" - $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE collapsing_00155" --server_logs_file='/dev/null'; + $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES collapsing_00155" + $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE collapsing_00155 FINAL" --server_logs_file='/dev/null'; $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $MAX FROM collapsing_00155" echo $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sumMerge(s) = $SUM FROM aggregating_00155" - $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE aggregating_00155" + $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES aggregating_00155" + $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE aggregating_00155 FINAL" $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sumMerge(s) = $SUM FROM aggregating_00155" echo $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM replacing_00155" - $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE replacing_00155" + $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES replacing_00155" + $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE replacing_00155 FINAL" $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $MAX FROM replacing_00155" $CLICKHOUSE_CLIENT --query="SELECT count() = sum(v) FROM replacing_00155 where x % 3 == 0 and x < $1" $CLICKHOUSE_CLIENT --query="SELECT count() = sum(v) FROM replacing_00155 where x % 3 == 1 and x < $2" diff --git a/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql b/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql index b73a04e19b9..0a5a84bbb46 100644 --- a/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql +++ b/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql @@ -1 +1,4 @@ +-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function. +SET max_bytes_before_external_group_by = 0; + SELECT k, finalizeAggregation(sum_state), runningAccumulate(sum_state) FROM (SELECT intDiv(number, 50000) AS k, sumState(number) AS sum_state FROM (SELECT number FROM system.numbers LIMIT 1000000) GROUP BY k ORDER BY k); diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference index b2b0b43e490..72828aae5a9 100644 --- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference +++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference @@ -36,9 +36,9 @@ GROUP BY ORDER BY 1 GROUP BY w/ ALIAS 0 -1 0 1 +1 ORDER BY w/ ALIAS 0 func(aggregate function) GROUP BY diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql index 422f4a010f1..1bd6cbe8948 100644 --- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql +++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql @@ -34,7 +34,7 @@ SELECT uniq(number) u FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184 -- cover possible tricky issues SELECT 'GROUP BY w/ ALIAS'; -SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) GROUP BY number AS n SETTINGS distributed_group_by_no_merge=2; +SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) GROUP BY number AS n ORDER BY n SETTINGS distributed_group_by_no_merge=2; SELECT 'ORDER BY w/ ALIAS'; SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) ORDER BY number AS n LIMIT 1 SETTINGS distributed_group_by_no_merge=2; diff --git a/tests/queries/0_stateless/00273_quantiles.sql b/tests/queries/0_stateless/00273_quantiles.sql index 9fef1f63057..f5b739b8be1 100644 --- a/tests/queries/0_stateless/00273_quantiles.sql +++ b/tests/queries/0_stateless/00273_quantiles.sql @@ -8,4 +8,7 @@ SELECT quantilesExact(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0 SELECT quantilesTDigest(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesDeterministic(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); +-- The result slightly differs but it's ok since `quantilesDeterministic` is an approximate function. +SET max_bytes_before_external_group_by = 0; + SELECT round(1000000 / (number + 1)) AS k, count() AS c, arrayMap(x -> round(x, 6), quantilesDeterministic(0.1, 0.5, 0.9)(number, intHash64(number))) AS q1, quantilesExact(0.1, 0.5, 0.9)(number) AS q2 FROM (SELECT number FROM system.numbers LIMIT 1000000) GROUP BY k ORDER BY k; diff --git a/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql b/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql index 67f5cc54afd..a3abbb9fd58 100644 --- a/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql +++ b/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql @@ -7,4 +7,8 @@ DROP TABLE IF EXISTS arena; SELECT length(arrayReduce('groupUniqArray', [[1, 2], [1], emptyArrayUInt8(), [1], [1, 2]])); SELECT min(x), max(x) FROM (SELECT length(arrayReduce('groupUniqArray', [hex(number), hex(number+1), hex(number)])) AS x FROM system.numbers LIMIT 100000); + +-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function. +SET max_bytes_before_external_group_by = 0; + SELECT sum(length(runningAccumulate(x))) FROM (SELECT groupUniqArrayState(toString(number % 10)) AS x, number FROM (SELECT * FROM system.numbers LIMIT 11) GROUP BY number ORDER BY number); diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index 11396dd34eb..1bb4dbd34de 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -settings="--log_queries=1 --log_query_threads=1 --log_profile_events=1 --log_query_settings=1 --allow_deprecated_syntax_for_merge_tree=1" +settings="--log_queries=1 --log_query_threads=1 --log_profile_events=1 --log_query_settings=1 --allow_deprecated_syntax_for_merge_tree=1 --max_bytes_before_external_group_by 0 --max_bytes_before_external_sort 0" # Test insert logging on each block and checkPacket() method diff --git a/tests/queries/0_stateless/00732_quorum_insert_have_data_before_quorum_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_have_data_before_quorum_zookeeper_long.sql index 23b368549f8..bff8c7e73ee 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_have_data_before_quorum_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_have_data_before_quorum_zookeeper_long.sql @@ -20,7 +20,6 @@ SET select_sequential_consistency=1; SELECT x FROM quorum1 ORDER BY x; SELECT x FROM quorum2 ORDER BY x; -SET insert_keeper_fault_injection_probability=0; SET insert_quorum=2, insert_quorum_parallel=0; INSERT INTO quorum1 VALUES (4, '1990-11-15'); diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index 74399c9f27c..a1859220c6c 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -11,7 +11,6 @@ CREATE TABLE quorum2(x UInt32, y Date) ENGINE ReplicatedMergeTree('/clickhouse/t SET insert_quorum=2, insert_quorum_parallel=0; SET select_sequential_consistency=1; -SET insert_keeper_fault_injection_probability=0; INSERT INTO quorum1 VALUES (1, '2018-11-15'); INSERT INTO quorum1 VALUES (2, '2018-11-15'); diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_zookeeper_long.sql index a61672249a8..61394447c3d 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_zookeeper_long.sql @@ -11,7 +11,6 @@ CREATE TABLE quorum2(x UInt32, y Date) ENGINE ReplicatedMergeTree('/clickhouse/t SET insert_quorum=2, insert_quorum_parallel=0; SET select_sequential_consistency=1; -SET insert_keeper_fault_injection_probability=0; SET insert_quorum_timeout=0; diff --git a/tests/queries/0_stateless/00732_quorum_insert_select_with_old_data_and_without_quorum_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_select_with_old_data_and_without_quorum_zookeeper_long.sql index e821d7587ee..e3e5aa7949f 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_select_with_old_data_and_without_quorum_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_select_with_old_data_and_without_quorum_zookeeper_long.sql @@ -17,7 +17,6 @@ SYSTEM SYNC REPLICA quorum2; SET select_sequential_consistency=1; SET insert_quorum=2, insert_quorum_parallel=0; -SET insert_keeper_fault_injection_probability=0; SET insert_quorum_timeout=0; diff --git a/tests/queries/0_stateless/00732_quorum_insert_simple_test_1_parts_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_simple_test_1_parts_zookeeper_long.sql index 22fb40f9f85..4eb263c75c2 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_simple_test_1_parts_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_simple_test_1_parts_zookeeper_long.sql @@ -11,7 +11,6 @@ CREATE TABLE quorum2(x UInt32, y Date) ENGINE ReplicatedMergeTree('/clickhouse/t SET insert_quorum=2, insert_quorum_parallel=0; SET select_sequential_consistency=1; -SET insert_keeper_fault_injection_probability=0; INSERT INTO quorum1 VALUES (1, '2018-11-15'); INSERT INTO quorum1 VALUES (2, '2018-11-15'); diff --git a/tests/queries/0_stateless/00732_quorum_insert_simple_test_2_parts_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_simple_test_2_parts_zookeeper_long.sql index a97b7438da0..7fb23936819 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_simple_test_2_parts_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_simple_test_2_parts_zookeeper_long.sql @@ -11,7 +11,6 @@ CREATE TABLE quorum2(x UInt32, y Date) ENGINE ReplicatedMergeTree('/clickhouse/t SET insert_quorum=2, insert_quorum_parallel=0; SET select_sequential_consistency=1; -SET insert_keeper_fault_injection_probability=0; INSERT INTO quorum1 VALUES (1, '2018-11-15'); INSERT INTO quorum1 VALUES (2, '2018-11-15'); diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference index 3de05d66188..dd5860ae491 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference @@ -6,7 +6,7 @@ │ name2 │ 1 │ 0 │ 0 │ 0 │ │ name3 │ 0 │ 0 │ 0 │ 0 │ └───────┴─────────────────────┴───────────────────┴───────────────────┴────────────────────┘ -231 1 +3 231 1 ┌─name────────────────┬─partition_key─┬─sorting_key───┬─primary_key─┬─sampling_key─┐ │ check_system_tables │ date │ date, version │ date │ │ └─────────────────────┴───────────────┴───────────────┴─────────────┴──────────────┘ @@ -51,3 +51,6 @@ Check total_bytes/total_rows for Set Check total_bytes/total_rows for Join 1 50 1 100 +Check total_uncompressed_bytes/total_bytes/total_rows for Materialized views +0 0 0 +1 1 1 diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index ae9db656f00..51818228913 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -23,7 +23,7 @@ FROM system.columns WHERE table = 'check_system_tables' AND database = currentDa FORMAT PrettyCompactNoEscapes; INSERT INTO check_system_tables VALUES (1, 1, 1); -SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase(); +SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase(); DROP TABLE IF EXISTS check_system_tables; @@ -138,3 +138,23 @@ SELECT total_bytes BETWEEN 5000 AND 15000, total_rows FROM system.tables WHERE n INSERT INTO check_system_tables SELECT number+50 FROM numbers(50); SELECT total_bytes BETWEEN 5000 AND 15000, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase(); DROP TABLE check_system_tables; + +-- Build MergeTree table for Materialized view +CREATE TABLE check_system_tables + ( + name1 UInt8, + name2 UInt8, + name3 UInt8 + ) ENGINE = MergeTree() + ORDER BY name1 + PARTITION BY name2 + SAMPLE BY name1 + SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; + +SELECT 'Check total_uncompressed_bytes/total_bytes/total_rows for Materialized views'; +CREATE MATERIALIZED VIEW check_system_tables_mv ENGINE = MergeTree() ORDER BY name2 AS SELECT name1, name2, name3 FROM check_system_tables; +SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase(); +INSERT INTO check_system_tables VALUES (1, 1, 1); +SELECT total_bytes_uncompressed > 0, total_bytes > 0, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase(); +DROP TABLE check_system_tables_mv; +DROP TABLE check_system_tables; diff --git a/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/tests/queries/0_stateless/00808_not_optimize_predicate.sql index ba8f5eb5753..d0dda14e026 100644 --- a/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -48,7 +48,8 @@ SELECT intDiv(number, 25) AS n, avgState(number) AS s FROM numbers(2500) -GROUP BY n; +GROUP BY n +ORDER BY n; SET force_primary_key = 1, enable_optimize_predicate_expression = 1; @@ -60,7 +61,8 @@ FROM finalizeAggregation(s) FROM test_00808_push_down_with_finalizeAggregation ) -WHERE (n >= 2) AND (n <= 5); +WHERE (n >= 2) AND (n <= 5) +ORDER BY n; EXPLAIN SYNTAX SELECT * FROM diff --git a/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql b/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql index 3864293751f..8eb9d83b730 100644 --- a/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql +++ b/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql @@ -2,6 +2,7 @@ -- Tag no-msan: memory limits don't work correctly under msan because it replaces malloc/free SET max_memory_usage = 1000000000; +SET max_bytes_before_external_group_by = 0; SELECT sum(ignore(*)) FROM ( SELECT number, argMax(number, (number, toFixedString(toString(number), 1024))) diff --git a/tests/queries/0_stateless/00947_ml_test.sql b/tests/queries/0_stateless/00947_ml_test.sql index 94e4f3b4626..72000103a44 100644 --- a/tests/queries/0_stateless/00947_ml_test.sql +++ b/tests/queries/0_stateless/00947_ml_test.sql @@ -40,10 +40,10 @@ INSERT INTO grouptest VALUES (1, 1.732, 3.653, 11.422), (1, 2.150, 2.103, 7.609), (1, 0.061, 3.310, 7.052), (1, 1.030, 3.671, 10.075), (1, 1.879, 0.578, 2.492), (1, 0.922, 2.552, 6.499), (1, 1.145, -0.095, -0.993), (1, 1.920, 0.373, 1.959), (1, 0.458, 0.094, -1.801), (1, -0.118, 3.273, 6.582), (1, 2.667, 1.472, 6.752), (1, -0.387, -0.529, -5.360), (1, 2.219, 1.790, 6.810), (1, -0.754, 2.139, 1.908), (1, -0.446, -0.668, -5.896), (1, 1.729, 0.914, 3.199), (1, 2.908, -0.420, 1.556), (1, 1.645, 3.581, 11.034), (1, 0.358, -0.950, -5.136), (1, -0.467, 2.339, 3.084), (1, 3.629, 2.959, 13.135), (1, 2.393, 0.926, 4.563), (1, -0.945, 0.281, -4.047), (1, 3.688, -0.570, 2.667), (1, 3.016, 1.775, 8.356), (1, 2.571, 0.139, 2.559), (1, 2.999, 0.956, 5.866), (1, 1.754, -0.809, -1.920), (1, 3.943, 0.382, 6.030), (1, -0.970, 2.315, 2.004), (1, 1.503, 0.790, 2.376), (1, -0.775, 2.563, 3.139), (1, 1.211, 0.113, -0.240), (1, 3.058, 0.977, 6.048), (1, 2.729, 1.634, 7.360), (1, 0.307, 2.759, 5.893), (1, 3.272, 0.181, 4.089), (1, 1.192, 1.963, 5.273), (1, 0.931, 1.447, 3.203), (1, 3.835, 3.447, 15.011), (1, 0.709, 0.008, -1.559), (1, 3.155, -0.676, 1.283), (1, 2.342, 1.047, 4.824), (1, 2.059, 1.262, 4.903), (1, 2.797, 0.855, 5.159), (1, 0.387, 0.645, -0.292), (1, 1.418, 0.408, 1.060), (1, 2.719, -0.826, -0.039), (1, 2.735, 3.736, 13.678), (1, 0.205, 0.777, -0.260), (1, 3.117, 2.063, 9.424), (1, 0.601, 0.178, -1.263), (1, 0.064, 0.157, -2.401), (1, 3.104, -0.455, 1.842), (1, -0.253, 0.672, -1.490), (1, 2.592, -0.408, 0.961), (1, -0.909, 1.314, -0.878), (1, 0.625, 2.594, 6.031), (1, 2.749, -0.210, 1.869), (1, -0.469, 1.532, 0.657), (1, 1.954, 1.827, 6.388), (1, -0.528, 1.136, -0.647), (1, 0.802, -0.583, -3.146), (1, -0.176, 1.584, 1.400), (1, -0.705, -0.785, -6.766), (1, 1.660, 2.365, 7.416), (1, 2.278, 3.977, 13.485), (1, 2.846, 3.845, 14.229), (1, 3.588, -0.401, 2.974), (1, 3.525, 3.831, 15.542), (1, 0.191, 3.312, 7.318), (1, 2.615, -0.287, 1.370), (1, 2.701, -0.446, 1.064), (1, 2.065, -0.556, -0.538), (1, 2.572, 3.618, 12.997), (1, 3.743, -0.708, 2.362), (1, 3.734, 2.319, 11.425), (1, 3.768, 2.777, 12.866), (1, 3.203, 0.958, 6.280), (1, 1.512, 2.635, 7.927), (1, 2.194, 2.323, 8.356), (1, -0.726, 2.729, 3.735), (1, 0.020, 1.704, 2.152), (1, 2.173, 2.856, 9.912), (1, 3.124, 1.705, 8.364), (1, -0.834, 2.142, 1.759), (1, -0.702, 3.024, 4.666), (1, 1.393, 0.583, 1.535), (1, 2.136, 3.770, 12.581), (1, -0.445, 0.991, -0.917), (1, 0.244, -0.835, -5.016), (1, 2.789, 0.691, 4.652), (1, 0.246, 2.661, 5.475), (1, 3.793, 2.671, 12.601), (1, 1.645, -0.973, -2.627), (1, 2.405, 1.842, 7.336), (1, 3.221, 3.109, 12.769), (1, -0.638, 3.220, 5.385), (1, 1.836, 3.025, 9.748), (1, -0.660, 1.818, 1.133), (1, 0.901, 0.981, 1.744), (1, -0.236, 3.087, 5.789), (1, 1.744, 3.864, 12.078), (1, -0.166, 3.186, 6.226), (1, 3.536, -0.090, 3.803), (1, 3.284, 2.026, 9.648), (1, 1.327, 2.822, 8.119), (1, -0.709, 0.105, -4.104), (1, 0.509, -0.989, -4.949), (1, 0.180, -0.934, -5.440), (1, 3.522, 1.374, 8.168), (1, 1.497, -0.764, -2.297), (1, 1.696, 2.364, 7.482), (1, -0.202, -0.032, -3.500), (1, 3.109, -0.138, 2.804), (1, -0.238, 2.992, 5.501), (1, 1.639, 1.634, 5.181), (1, 1.919, 0.341, 1.859), (1, -0.563, 1.750, 1.124), (1, 0.886, 3.589, 9.539), (1, 3.619, 3.020, 13.299), (1, 1.703, -0.493, -1.073), (1, 2.364, 3.764, 13.022), (1, 1.820, 1.854, 6.201), (1, 1.437, -0.765, -2.421), (1, 1.396, 0.959, 2.668), (1, 2.608, 2.032, 8.312), (1, 0.333, -0.040, -2.455), (1, 3.441, 0.824, 6.355), (1, 1.303, 2.767, 7.908), (1, 1.359, 2.404, 6.932), (1, 0.674, 0.241, -0.930), (1, 2.708, -0.077, 2.183), (1, 3.821, 3.215, 14.287), (1, 3.316, 1.591, 8.404), (1, -0.848, 1.145, -1.259), (1, 3.455, 3.081, 13.153), (1, 2.568, 0.259, 2.914), (1, 2.866, 2.636, 10.642), (1, 2.776, -0.309, 1.626), (1, 2.087, 0.619, 3.031), (1, 1.682, 1.201, 3.967), (1, 3.800, 2.600, 12.399), (1, 3.344, -0.780, 1.347), (1, 1.053, -0.817, -3.346), (1, 0.805, 3.085, 7.865), (1, 0.173, 0.069, -2.449), (1, 2.018, 1.309, 4.964), (1, 3.713, 3.804, 15.838), (1, 3.805, -0.063, 4.421), (1, 3.587, 2.854, 12.738), (1, 2.426, -0.179, 1.315), (1, 0.535, 0.572, -0.213), (1, -0.558, 0.142, -3.690), (1, -0.875, 2.700, 3.349), (1, 2.405, 3.933, 13.610), (1, 1.633, 1.222, 3.934), (1, 0.049, 2.853, 5.657), (1, 1.146, 0.907, 2.015), (1, 0.300, 0.219, -1.744), (1, 2.226, 2.526, 9.029), (1, 2.545, -0.762, -0.198), (1, 2.553, 3.956, 13.974), (1, -0.898, 2.836, 3.713), (1, 3.796, -0.202, 3.985), (1, -0.810, 2.963, 4.268), (1, 0.511, 2.104, 4.334), (1, 3.527, 3.741, 15.275), (1, -0.921, 3.094, 4.440), (1, 0.856, 3.108, 8.036), (1, 0.815, 0.565, 0.323), (1, 3.717, 0.693, 6.512), (1, 3.052, 3.558, 13.778), (1, 2.942, 3.034, 11.986), (1, 0.765, 3.177, 8.061), (1, 3.175, -0.525, 1.776), (1, 0.309, 1.006, 0.638), (1, 1.922, 0.835, 3.349), (1, 3.678, 3.314, 14.297), (1, 2.840, -0.486, 1.221), (1, 1.195, 3.396, 9.578), (1, -0.157, 3.122, 6.053), (1, 2.404, 1.434, 6.110), (1, 3.108, 2.210, 9.845), (1, 2.289, 1.188, 5.142), (1, -0.319, -0.044, -3.769), (1, -0.625, 3.701, 6.854), (1, 2.269, -0.276, 0.710), (1, 0.777, 1.963, 4.442), (1, 0.411, 1.893, 3.501), (1, 1.173, 0.461, 0.728), (1, 1.767, 3.077, 9.765), (1, 0.853, 3.076, 7.933), (1, -0.013, 3.149, 6.421), (1, 3.841, 1.526, 9.260), (1, -0.950, 0.277, -4.070), (1, -0.644, -0.747, -6.527), (1, -0.923, 1.733, 0.353), (1, 0.044, 3.037, 6.201), (1, 2.074, 2.494, 8.631), (1, 0.016, 0.961, -0.085), (1, -0.780, -0.448, -5.904), (1, 0.170, 1.936, 3.148), (1, -0.420, 3.730, 7.349), (1, -0.630, 1.504, 0.254), (1, -0.006, 0.045, -2.879), (1, 1.101, -0.985, -3.753), (1, 1.618, 0.555, 1.900), (1, -0.336, 1.408, 0.552), (1, 1.086, 3.284, 9.024), (1, -0.815, 2.032, 1.466), (1, 3.144, -0.380, 2.148), (1, 2.326, 2.077, 7.883), (1, -0.571, 0.964, -1.251), (1, 2.416, 1.255, 5.595), (1, 3.964, 1.379, 9.065), (1, 3.897, 1.553, 9.455), (1, 1.806, 2.667, 8.611), (1, 0.323, 3.809, 9.073), (1, 0.501, 3.256, 7.769), (1, -0.679, 3.539, 6.259), (1, 2.825, 3.856, 14.219), (1, 0.288, -0.536, -4.032), (1, 3.009, 0.725, 5.193), (1, -0.763, 1.140, -1.105), (1, 1.124, 3.807, 10.670), (1, 2.478, 0.204, 2.570), (1, 2.825, 2.639, 10.566), (1, 1.878, -0.883, -1.892), (1, 3.380, 2.942, 12.587), (1, 2.202, 1.739, 6.621), (1, -0.711, -0.680, -6.463), (1, -0.266, 1.827, 1.951), (1, -0.846, 1.003, -1.683), (1, 3.201, 0.132, 3.798), (1, 2.797, 0.085, 2.849), (1, 1.632, 3.269, 10.072), (1, 2.410, 2.727, 10.003), (1, -0.624, 0.853, -1.690), (1, 1.314, 3.268, 9.433), (1, -0.395, 0.450, -2.440), (1, 0.992, 3.168, 8.489), (1, 3.355, 2.106, 10.028), (1, 0.509, -0.888, -4.647), (1, 1.007, 0.797, 1.405), (1, 0.045, 0.211, -2.278), (1, -0.911, 1.093, -1.544), (1, 2.409, 0.273, 2.637), (1, 2.640, 3.540, 12.899), (1, 2.668, -0.433, 1.038), (1, -0.014, 0.341, -2.005), (1, -0.525, -0.344, -5.083), (1, 2.278, 3.517, 12.105), (1, 3.712, 0.901, 7.128), (1, -0.689, 2.842, 4.149), (1, -0.467, 1.263, -0.147), (1, 0.963, -0.653, -3.034), (1, 2.559, 2.590, 9.889), (1, 1.566, 1.393, 4.312), (1, -1.000, 1.809, 0.429), (1, -0.297, 3.221, 6.070), (1, 2.199, 3.820, 12.856), (1, 3.096, 3.251, 12.944), (1, 1.479, 1.835, 5.461), (1, 0.276, 0.773, -0.130), (1, 0.607, 1.382, 2.360), (1, 1.169, -0.108, -0.985), (1, 3.429, 0.475, 5.282), (1, 2.626, 0.104, 2.563), (1, 1.156, 3.512, 9.850), (1, 3.947, 0.796, 7.282), (1, -0.462, 2.425, 3.351), (1, 3.957, 0.366, 6.014), (1, 3.763, -0.330, 3.536), (1, 0.667, 3.361, 8.417), (1, -0.583, 0.892, -1.492), (1, -0.505, 1.344, 0.021), (1, -0.474, 2.714, 4.195), (1, 3.455, 0.014, 3.950), (1, 1.016, 1.828, 4.516), (1, 1.845, 0.193, 1.269), (1, -0.529, 3.930, 7.731), (1, 2.636, 0.045, 2.408), (1, 3.757, -0.918, 1.760), (1, -0.808, 1.160, -1.137), (1, 0.744, 1.435, 2.793), (1, 3.457, 3.566, 14.613), (1, 1.061, 3.140, 8.544), (1, 3.733, 3.368, 14.570), (1, -0.969, 0.879, -2.301), (1, 3.940, 3.136, 14.287), (1, -0.730, 2.107, 1.860), (1, 3.699, 2.820, 12.858), (1, 2.197, -0.636, -0.514), (1, 0.775, -0.979, -4.387), (1, 2.019, 2.828, 9.521), (1, 1.415, 0.113, 0.170), (1, 1.567, 3.410, 10.363), (1, 0.984, -0.960, -3.913), (1, 1.809, 2.487, 8.079), (1, 1.550, 1.130, 3.489), (1, -0.770, 3.027, 4.542), (1, -0.358, 3.326, 6.262), (1, 3.140, 0.096, 3.567), (1, -0.685, 2.213, 2.270), (1, 0.916, 0.692, 0.907), (1, 1.526, 1.159, 3.527), (1, 2.675, -0.568, 0.645), (1, 1.740, 3.019, 9.538), (1, 1.223, 2.088, 5.709), (1, 1.572, -0.125, -0.230), (1, 3.641, 0.362, 5.369), (1, 2.944, 3.897, 14.578), (1, 2.775, 2.461, 9.932), (1, -0.200, 2.492, 4.076), (1, 0.065, 2.055, 3.296), (1, 2.375, -0.639, -0.167), (1, -0.133, 1.138, 0.149), (1, -0.385, 0.163, -3.281), (1, 2.200, 0.863, 3.989), (1, -0.470, 3.492, 6.536), (1, -0.916, -0.547, -6.472), (1, 0.634, 0.927, 1.049), (1, 2.930, 2.655, 10.825), (1, 3.094, 2.802, 11.596), (1, 0.457, 0.539, -0.470), (1, 1.277, 2.229, 6.240), (1, -0.157, 1.270, 0.496), (1, 3.320, 0.640, 5.559), (1, 2.836, 1.067, 5.872), (1, 0.921, -0.716, -3.307), (1, 3.886, 1.487, 9.233), (1, 0.306, -0.142, -2.815), (1, 3.727, -0.410, 3.225), (1, 1.268, -0.801, -2.866), (1, 2.302, 2.493, 9.084), (1, 0.331, 0.373, -1.220), (1, 3.224, -0.857, 0.879), (1, 1.328, 2.786, 8.014), (1, 3.639, 1.601, 9.081), (1, 3.201, -0.484, 1.949), (1, 3.447, -0.734, 1.692), (1, 2.773, -0.143, 2.117), (1, 1.517, -0.493, -1.445), (1, 1.778, -0.428, -0.728), (1, 3.989, 0.099, 5.274), (1, 1.126, 3.985, 11.206), (1, 0.348, 0.756, -0.035), (1, 2.399, 2.576, 9.525), (1, 0.866, 1.800, 4.132), (1, 3.612, 1.598, 9.017), (1, 0.495, 2.239, 4.707), (1, 2.442, 3.712, 13.019), (1, 0.238, -0.844, -5.057), (1, 1.404, 3.095, 9.093), (1, 2.842, 2.044, 8.816), (1, 0.622, 0.322, -0.791), (1, -0.561, 1.242, -0.395), (1, 0.679, 3.822, 9.823), (1, 1.875, 3.526, 11.327), (1, 3.587, 1.050, 7.324), (1, 1.467, 0.588, 1.699), (1, 3.180, 1.571, 8.074), (1, 1.402, 0.430, 1.093), (1, 1.834, 2.209, 7.294), (1, 3.542, -0.259, 3.306), (1, -0.517, 0.174, -3.513), (1, 3.549, 2.210, 10.729), (1, 2.260, 3.393, 11.699), (1, 0.036, 1.893, 2.751), (1, 0.680, 2.815, 6.804), (1, 0.219, 0.368, -1.459), (1, -0.519, 3.987, 7.924), (1, 0.974, 0.761, 1.231), (1, 0.107, 0.620, -0.927), (1, 1.513, 1.910, 5.755), (1, 3.114, 0.894, 5.910), (1, 3.061, 3.052, 12.276), (1, 2.556, 3.779, 13.448), (1, 1.964, 2.692, 9.002), (1, 3.894, -0.032, 4.690), (1, -0.693, 0.910, -1.655), (1, 2.692, 2.908, 11.108), (1, -0.824, 1.190, -1.078), (1, 3.621, 0.918, 6.997), (1, 3.190, 2.442, 10.707), (1, 1.424, -0.546, -1.791), (1, 2.061, -0.427, -0.158), (1, 1.532, 3.158, 9.540), (1, 0.648, 3.557, 8.967), (1, 2.511, 1.665, 7.017), (1, 1.903, -0.168, 0.302), (1, -0.186, -0.718, -5.528), (1, 2.421, 3.896, 13.531), (1, 3.063, 1.841, 8.650), (1, 0.636, 1.699, 3.367), (1, 1.555, 0.688, 2.174), (1, -0.412, 0.454, -2.462), (1, 1.645, 3.207, 9.911), (1, 3.396, 3.766, 15.090), (1, 0.375, -0.256, -3.017), (1, 3.636, 0.732, 6.469), (1, 2.503, 3.133, 11.405), (1, -0.253, 0.693, -1.429), (1, 3.178, 3.110, 12.686), (1, 3.282, -0.725, 1.388), (1, -0.297, 1.222, 0.070), (1, 1.872, 3.211, 10.377), (1, 3.471, 1.446, 8.278), (1, 2.891, 0.197, 3.374), (1, -0.896, 2.198, 1.802), (1, 1.178, -0.717, -2.796), (1, 0.650, 3.371, 8.412), (1, 0.447, 3.248, 7.637), (1, 1.616, -0.109, -0.097), (1, 1.837, 1.092, 3.951), (1, 0.767, 1.384, 2.684), (1, 3.466, -0.600, 2.133), (1, -0.800, -0.734, -6.802), (1, -0.534, 0.068, -3.865), (1, 3.416, -0.459, 2.455), (1, 0.800, -0.132, -1.795), (1, 2.150, 1.190, 4.869), (1, 0.830, 1.220, 2.319), (1, 2.656, 2.587, 10.072), (1, 0.375, -0.219, -2.906), (1, 0.582, -0.637, -3.749), (1, 0.588, -0.723, -3.992), (1, 3.875, 2.126, 11.127), (1, -0.476, 1.909, 1.775), (1, 0.963, 3.597, 9.716), (1, -0.888, 3.933, 7.021), (1, 1.711, -0.868, -2.184), (1, 3.244, 1.990, 9.460), (1, -0.057, 1.537, 1.497), (1, -0.015, 3.511, 7.504), (1, 0.280, 0.582, -0.695), (1, 2.402, 2.731, 9.998), (1, 2.053, 2.253, 7.865), (1, 1.955, 0.172, 1.424), (1, 3.746, 0.872, 7.107), (1, -0.157, 2.381, 3.829), (1, 3.548, -0.918, 1.340), (1, 2.449, 3.195, 11.482), (1, 1.582, 1.055, 3.329), (1, 1.908, -0.839, -1.700), (1, 2.341, 3.137, 11.091), (1, -0.043, 3.873, 8.532), (1, 0.528, -0.752, -4.198), (1, -0.940, 0.261, -4.098), (1, 2.609, 3.531, 12.812), (1, 2.439, 2.486, 9.336), (1, -0.659, -0.150, -4.768), (1, 2.131, 1.973, 7.181), (1, 0.253, 0.304, -1.583), (1, -0.169, 2.273, 3.480), (1, 1.855, 3.974, 12.631), (1, 0.092, 1.160, 0.666), (1, 3.990, 0.402, 6.187), (1, -0.455, 0.932, -1.113), (1, 2.365, 1.152, 5.185), (1, -0.058, 1.244, 0.618), (1, 0.674, 0.481, -0.209), (1, 3.002, 0.246, 3.743), (1, 1.804, 3.765, 11.902), (1, 3.567, -0.752, 1.876), (1, 0.098, 2.257, 3.968), (1, 0.130, -0.889, -5.409), (1, 0.633, 1.891, 3.940), (1, 0.421, 2.533, 5.440), (1, 2.252, 1.853, 7.063), (1, 3.191, -0.980, 0.443), (1, -0.776, 3.241, 5.171), (1, 0.509, 1.737, 3.229), (1, 3.583, 1.274, 7.986), (1, 1.101, 2.896, 7.891), (1, 3.072, -0.008, 3.120), (1, 2.945, -0.295, 2.006), (1, 3.621, -0.161, 3.760), (1, 1.399, 3.759, 11.075), (1, 3.783, -0.866, 1.968), (1, -0.241, 2.902, 5.225), (1, 1.323, 1.934, 5.449), (1, 1.449, 2.855, 8.464), (1, 0.088, 1.526, 1.753), (1, -1.000, 2.161, 1.485), (1, -0.214, 3.358, 6.647), (1, -0.384, 3.230, 5.921), (1, 3.146, 1.228, 6.975), (1, 1.917, 0.860, 3.415), (1, 1.982, 1.735, 6.167), (1, 1.404, 1.851, 5.360), (1, 2.428, -0.674, -0.166), (1, 2.081, -0.505, -0.352), (1, 0.914, -0.543, -2.802), (1, -0.029, -0.482, -4.506), (1, 0.671, 0.184, -1.105), (1, 1.641, -0.524, -1.292), (1, 1.005, 0.361, 0.094), (1, -0.493, 3.582, 6.760), (2, 3.876, 2.563, 21.500), (2, 0.159, -0.309, 7.986), (2, -0.496, 0.417, 12.998), (2, -0.164, -0.512, 7.092), (2, 0.632, 3.200, 28.571), (2, 3.772, 0.493, 9.188), (2, 2.430, -0.797, 2.789), (2, 3.872, -0.775, 1.475), (2, -0.031, -0.256, 8.495), (2, 2.726, 3.000, 25.271), (2, 1.116, -0.269, 7.269), (2, 0.551, 3.402, 29.860), (2, 0.820, 2.500, 24.179), (2, 1.153, -0.453, 6.131), (2, -0.717, -0.360, 8.556), (2, 0.532, 0.531, 12.654), (2, 2.096, 0.981, 13.791), (2, 0.146, -0.433, 7.259), (2, 1.000, 1.075, 15.452), (2, 2.963, -0.090, 6.495), (2, 1.047, 2.052, 21.267), (2, 0.882, 1.778, 19.785), (2, 1.380, 2.702, 24.832), (2, 1.853, 0.401, 10.554), (2, 2.004, 1.770, 18.618), (2, 3.377, 0.772, 11.253), (2, 1.227, -0.169, 7.759), (2, 0.428, 2.052, 21.885), (2, 0.070, 3.648, 31.816), (2, 0.128, -0.938, 4.244), (2, 2.061, 0.753, 12.454), (2, 1.207, -0.301, 6.989), (2, -0.168, 3.765, 32.757), (2, 3.450, 1.801, 17.353), (2, -0.483, 3.344, 30.547), (2, 1.847, 1.884, 19.455), (2, 3.241, 2.369, 20.975), (2, 0.628, 3.590, 30.912), (2, 2.183, 1.741, 18.263), (2, 0.774, 2.638, 25.057), (2, 3.292, 2.867, 23.912), (2, 0.056, 2.651, 25.850), (2, -0.506, 0.300, 12.308), (2, 0.524, 1.182, 16.570), (2, -0.267, 2.563, 25.647), (2, 3.953, -0.334, 4.040), (2, 2.507, 2.319, 21.408), (2, -0.770, 1.017, 16.875), (2, 0.481, 1.591, 19.062), (2, 3.243, 1.060, 13.114), (2, 2.178, -0.325, 5.873), (2, 2.510, 1.235, 14.900), (2, 2.684, 2.370, 21.535), (2, 3.466, 3.656, 28.469), (2, 2.994, 3.960, 30.764), (2, -0.363, 3.592, 31.917), (2, 1.738, 0.074, 8.708), (2, 1.462, 3.727, 30.902), (2, 0.059, 0.180, 11.021), (2, 2.980, 2.317, 20.925), (2, 1.248, 0.965, 14.545), (2, 0.776, -0.229, 7.850), (2, -0.562, 2.839, 27.598), (2, 3.581, 0.244, 7.883), (2, -0.958, 0.901, 16.362), (2, 3.257, 0.364, 8.925), (2, 1.478, 1.718, 18.827), (2, -0.121, -0.436, 7.507), (2, 0.966, 1.444, 17.697), (2, 3.631, 3.463, 27.144), (2, 0.174, -0.663, 5.848), (2, 2.783, 0.124, 7.959), (2, 1.106, -0.936, 3.276), (2, 0.186, -0.942, 4.162), (2, 3.513, 2.456, 21.222), (2, 0.339, 2.316, 23.558), (2, 0.566, 2.515, 24.523), (2, -0.134, 0.746, 14.607), (2, 1.554, 0.106, 9.084), (2, -0.846, 2.748, 27.337), (2, 3.934, 0.564, 9.451), (2, 2.840, -0.966, 1.366), (2, 1.379, 0.307, 10.463), (2, 1.065, -0.780, 4.253), (2, 3.324, 2.145, 19.546), (2, 0.974, -0.543, 5.767), (2, 2.469, 3.976, 31.385), (2, -0.434, 3.689, 32.570), (2, 0.261, 0.481, 12.624), (2, 3.786, 2.605, 21.843), (2, -0.460, -0.536, 7.243), (2, 2.576, 2.880, 24.702), (2, -0.501, 3.551, 31.810), (2, 2.946, 3.263, 26.633), (2, 2.959, -0.813, 2.162), (2, -0.749, 0.490, 13.686), (2, 2.821, 0.335, 9.187), (2, 3.964, 0.272, 7.667), (2, 0.808, -0.700, 4.994), (2, 0.415, 2.183, 22.682), (2, 2.551, 3.785, 30.156), (2, 0.821, 1.120, 15.897), (2, 1.714, 3.019, 26.400), (2, 2.265, 1.950, 19.438), (2, 1.493, 3.317, 28.409), (2, -0.445, 2.282, 24.134), (2, -0.508, 2.508, 25.553), (2, 1.017, -0.621, 5.255), (2, 1.053, 2.246, 22.422), (2, 0.441, 1.637, 19.382), (2, 3.657, 1.246, 13.816), (2, 0.756, 0.808, 14.095), (2, 1.849, 1.599, 17.742), (2, 1.782, -0.000, 8.215), (2, 1.136, 3.940, 32.506), (2, 2.814, 3.288, 26.916), (2, 3.180, 3.198, 26.008), (2, 0.728, -0.054, 8.946), (2, 0.801, 0.775, 13.852), (2, 1.399, -0.546, 5.322), (2, 1.415, 1.753, 19.103), (2, 2.860, 1.796, 17.913), (2, 0.712, 2.902, 26.699), (2, -0.389, 3.093, 28.945), (2, 3.661, 3.666, 28.333), (2, 3.944, 0.996, 12.030), (2, 1.655, 1.385, 16.657), (2, 0.122, -0.662, 5.906), (2, 3.667, 2.763, 22.912), (2, 2.606, 0.630, 11.172), (2, -0.291, 1.492, 19.242), (2, -0.787, 1.223, 18.125), (2, 2.405, 0.325, 9.545), (2, 3.129, -0.412, 4.398), (2, 0.588, 3.964, 33.194), (2, -0.177, 3.636, 31.993), (2, 2.079, 3.280, 27.603), (2, 3.055, 3.958, 30.692), (2, -0.164, 3.188, 29.292), (2, 3.803, 3.151, 25.105), (2, 3.123, -0.891, 1.531), (2, 3.070, -0.824, 1.988), (2, 3.103, -0.931, 1.309), (2, 0.589, 3.353, 29.529), (2, 1.095, 1.973, 20.744), (2, -0.557, 0.370, 12.775), (2, 1.223, 0.307, 10.620), (2, 3.255, -0.768, 2.136), (2, 0.508, 2.157, 22.435), (2, 0.373, 0.319, 11.544), (2, 1.240, 1.736, 19.177), (2, 1.846, 0.970, 13.972), (2, 3.352, -0.534, 3.445), (2, -0.352, -0.290, 8.610), (2, 0.281, 0.193, 10.880), (2, 3.450, -0.059, 6.193), (2, 0.310, 2.575, 25.140), (2, 1.791, 1.127, 14.970), (2, 1.992, 2.347, 22.087), (2, -0.288, 2.881, 27.576), (2, 3.464, 3.664, 28.518), (2, 0.573, 2.789, 26.159), (2, 2.265, 1.583, 17.233), (2, 3.203, 0.730, 11.177), (2, 3.345, 1.368, 14.862), (2, 0.891, 3.690, 31.248), (2, 2.252, -0.311, 5.884), (2, -0.087, 0.804, 14.912), (2, 0.153, 2.510, 24.905), (2, 3.533, -0.965, 0.675), (2, 2.035, 1.953, 19.683), (2, 0.316, 2.448, 24.373), (2, 2.199, 3.858, 30.946), (2, -0.519, 3.647, 32.399), (2, 0.867, 1.961, 20.901), (2, 2.739, 2.268, 20.866), (2, 2.462, -0.664, 3.551), (2, 1.372, 3.419, 29.144), (2, -0.628, 2.723, 26.968), (2, 3.989, -0.225, 4.659), (2, 0.166, 3.190, 28.976), (2, 1.681, 2.937, 25.943), (2, 2.979, 2.263, 20.600), (2, 3.896, -0.419, 3.590), (2, 3.861, 2.224, 19.485), (2, -0.087, -0.861, 4.918), (2, 1.182, 1.886, 20.133), (2, 3.622, 2.320, 20.301), (2, 3.560, 0.008, 6.491), (2, 3.082, -0.605, 3.285), (2, 1.777, 1.324, 16.169), (2, 2.269, 2.436, 22.348), (2, 0.019, 3.074, 28.423), (2, -0.560, 3.868, 33.765), (2, 1.568, 2.886, 25.749), (2, 2.045, 0.222, 9.286), (2, 1.391, 0.352, 10.723), (2, 0.172, 1.908, 21.276), (2, 1.173, -0.726, 4.474), (2, 1.642, 2.576, 23.814), (2, 3.346, 1.377, 14.918), (2, 0.120, 0.411, 12.344), (2, 3.913, 0.820, 11.008), (2, 1.054, 3.732, 31.340), (2, 2.284, 0.108, 8.362), (2, 2.266, 0.066, 8.131), (2, 3.204, 1.156, 13.735), (2, 3.243, 2.032, 18.947), (2, 3.052, -0.121, 6.221), (2, 1.131, 2.189, 22.000), (2, 2.958, 0.658, 10.990), (2, 1.717, 3.708, 30.530), (2, 2.417, 2.070, 20.004), (2, 2.175, 0.881, 13.110), (2, 0.333, 3.494, 30.629), (2, 3.598, 3.940, 30.044), (2, 3.683, -0.110, 5.660), (2, 2.555, 1.196, 14.620), (2, 1.511, 0.453, 11.206), (2, 0.903, 1.390, 17.439), (2, -0.897, 3.303, 30.716), (2, 0.245, 2.129, 22.527), (2, 1.370, 2.715, 24.923), (2, 1.822, -0.917, 2.676), (2, 2.690, -0.109, 6.657), (2, 0.206, 1.561, 19.162), (2, 3.905, 2.710, 22.357), (2, -0.438, 3.207, 29.678), (2, 0.898, 3.445, 29.772), (2, 1.838, 2.871, 25.385), (2, 0.116, 1.401, 18.292), (2, -0.408, 2.375, 24.656), (2, 1.681, 3.338, 28.349), (2, 1.177, -0.318, 6.914), (2, 1.004, 0.626, 12.753), (2, 2.840, 2.589, 22.691), (2, 1.258, 3.993, 32.700), (2, 2.016, 3.489, 28.920), (2, -0.728, 0.164, 11.713), (2, 0.193, 1.479, 18.682), (2, 2.647, -0.969, 1.541), (2, 3.837, 2.602, 21.773), (2, 0.541, 0.205, 10.690), (2, 0.026, 2.756, 26.511), (2, 0.924, 0.909, 14.530), (2, 0.974, -0.074, 8.581), (2, 0.081, 0.005, 9.948), (2, 1.331, 2.942, 26.320), (2, 2.498, 3.405, 27.934), (2, 3.741, 1.554, 15.581), (2, 3.502, -0.089, 5.964), (2, 3.069, 1.768, 17.539), (2, 3.115, -0.008, 6.839), (2, 3.237, -0.503, 3.745), (2, 0.768, -0.135, 8.420), (2, 0.410, 3.974, 33.437), (2, 0.238, -0.700, 5.564), (2, 3.619, 0.350, 8.482), (2, 3.563, 3.059, 24.788), (2, 2.916, 3.101, 25.691), (2, 0.144, 3.282, 29.549), (2, 1.288, 2.642, 24.565), (2, -0.859, 0.229, 12.234), (2, 1.507, -0.711, 4.229), (2, -0.634, 2.608, 26.281), (2, 2.054, -0.834, 2.942), (2, 0.453, 1.072, 15.980), (2, 3.914, 1.159, 13.039), (2, 0.254, 1.835, 20.758), (2, 1.577, 0.428, 10.991), (2, 1.990, 3.569, 29.421), (2, 1.584, 1.803, 19.234), (2, 0.835, 3.603, 30.785), (2, 0.900, 3.033, 27.296), (2, 1.180, 0.280, 10.499), (2, 2.400, 2.802, 24.409), (2, 0.924, 2.462, 23.851), (2, 2.138, 0.722, 12.192), (2, -0.253, -0.809, 5.401), (2, 3.570, -0.116, 5.733), (2, 0.201, -0.182, 8.708), (2, 2.457, 0.454, 10.267), (2, -0.053, 0.443, 12.709), (2, 2.108, 2.069, 20.309), (2, -0.964, -0.441, 8.318), (2, 1.802, 0.403, 10.614), (2, 3.704, 3.902, 29.711), (2, 1.904, 2.418, 22.603), (2, 2.965, 3.429, 27.606), (2, -0.801, -0.072, 10.370), (2, 3.009, 0.491, 9.937), (2, 2.781, 1.026, 13.376), (2, -0.421, 0.744, 14.883), (2, 3.639, -0.148, 5.476), (2, 0.584, 2.041, 21.663), (2, 1.547, -0.391, 6.107), (2, -0.204, 0.727, 14.564), (2, 0.372, 0.464, 12.410), (2, 1.185, 1.732, 19.207), (2, 3.574, 0.755, 10.954), (2, 2.164, 1.425, 16.385), (2, 1.895, 1.374, 16.351), (2, 2.352, 2.188, 20.779), (2, 0.187, 0.677, 13.874), (2, -0.589, 3.686, 32.703), (2, 3.081, 0.414, 9.403), (2, 3.341, 3.246, 26.137), (2, 0.617, -0.201, 8.174), (2, 1.518, 3.833, 31.481), (2, 2.613, -0.350, 5.286), (2, 3.426, 0.751, 11.082), (2, 2.726, 3.586, 28.787), (2, 2.834, -0.219, 5.855), (2, 1.038, 3.607, 30.605), (2, 0.479, 1.226, 16.874), (2, 1.729, 0.297, 10.053), (2, 0.050, 1.815, 20.841), (2, -0.554, 3.538, 31.782), (2, 2.773, 0.973, 13.064), (2, -0.239, 3.425, 30.786), (2, 3.611, 3.700, 28.590), (2, 1.418, 3.625, 30.332), (2, 1.599, 1.626, 18.156), (2, 1.841, 1.518, 17.269), (2, 1.119, 1.996, 20.856), (2, 2.810, 2.293, 20.947), (2, 1.174, 2.062, 21.198), (2, -0.326, -0.279, 8.655), (2, -0.365, 0.816, 15.259), (2, 1.296, -0.095, 8.132), (2, -0.263, 0.511, 13.327), (2, 1.757, 3.012, 26.314), (2, 1.849, 1.065, 14.539), (2, 1.651, 2.244, 21.814), (2, 3.942, 1.026, 12.214), (2, 2.314, 1.944, 19.353), (2, 3.055, -0.002, 6.930), (2, 0.402, 1.350, 17.698), (2, 0.004, 2.288, 23.724), (2, 3.265, 2.962, 24.509), (2, 1.044, -0.684, 4.850), (2, -0.280, 2.278, 23.948), (2, 1.216, 0.726, 13.142), (2, 3.181, 3.518, 27.925), (2, 3.199, -0.124, 6.055), (2, 0.510, -0.622, 5.755), (2, 2.920, 1.067, 13.484), (2, 2.573, 1.844, 18.492), (2, 1.155, 3.505, 29.878), (2, 2.033, 1.756, 18.502), (2, 1.312, 0.114, 9.373), (2, -0.823, 3.339, 30.854), (2, 0.287, 3.891, 33.060), (2, -0.621, -0.210, 9.363), (2, 3.734, 1.574, 15.712), (2, -0.932, 0.772, 15.561), (2, -0.719, 1.604, 20.345), (2, -0.555, 0.773, 15.190), (2, -0.744, 3.934, 34.348), (2, 1.671, -0.425, 5.778), (2, 2.754, 2.690, 23.385), (2, 1.826, 2.185, 21.283), (2, 1.970, 0.021, 8.159), (2, 2.882, 3.494, 28.081), (2, 1.668, -0.030, 8.150), (2, 0.472, 2.184, 22.633), (2, 1.656, 3.393, 28.701), (2, -0.069, 2.331, 24.057), (2, 0.075, 1.341, 17.973), (2, 1.836, 0.565, 11.554), (2, -0.235, 0.520, 13.357), (2, 3.620, 3.169, 25.393), (2, 0.401, -0.062, 9.224), (2, 1.503, 1.667, 18.501), (2, 3.727, 1.149, 13.166), (2, 2.777, -0.081, 6.737), (2, 3.914, -0.234, 4.680), (2, 1.765, 0.750, 12.737), (2, 1.746, 1.818, 19.161), (2, 0.019, 2.819, 26.893), (2, 1.068, 1.917, 20.434), (2, 3.035, 3.158, 25.915), (2, 2.012, 0.724, 12.330), (2, 2.597, 2.264, 20.986), (2, 3.428, 3.239, 26.005), (2, -0.016, -0.529, 6.842), (2, 1.314, 0.735, 13.095), (2, 2.832, -0.567, 3.768), (2, -0.296, 2.641, 26.141), (2, 2.863, 3.889, 30.470), (2, 2.849, 3.997, 31.130), (2, 1.660, 1.813, 19.216), (2, 2.798, 0.977, 13.062), (2, 3.935, 0.549, 9.359), (2, 1.002, 3.557, 30.342), (2, 3.052, 2.207, 20.193), (2, 3.455, 0.458, 9.294), (2, 3.312, 2.138, 19.515), (2, 0.292, 0.058, 10.056), (2, 0.050, -0.211, 8.682), (2, -0.215, 1.108, 16.866), (2, -0.169, 0.647, 14.048), (2, 2.546, 0.876, 12.709), (2, -0.911, -0.209, 9.659), (2, 0.950, 2.894, 26.413), (2, -0.512, -0.167, 9.508), (2, 1.821, -0.747, 3.696), (2, 2.257, 3.945, 31.415), (2, 2.398, -0.586, 4.087), (2, 3.051, 0.815, 11.836), (2, 3.399, 2.131, 19.389), (2, 2.982, 1.549, 16.314), (2, -0.790, -0.329, 8.819), (2, 3.797, 0.327, 8.167), (2, 1.838, 0.290, 9.902), (2, 1.906, 1.782, 18.785), (2, 1.330, -0.208, 7.422), (2, -0.217, 0.854, 15.344), (2, 3.310, 1.582, 16.180), (2, 2.965, 0.917, 12.537), (2, 3.558, -0.164, 5.460), (2, -0.841, 2.060, 23.203), (2, 2.892, 2.621, 22.834), (2, -0.011, -0.198, 8.821), (2, -0.430, 2.999, 28.424), (2, -0.584, 0.894, 15.946), (2, 0.033, 1.310, 17.829), (2, 3.044, 0.410, 9.418), (2, 3.932, 0.295, 7.836), (2, 0.394, 1.315, 17.494), (2, 1.424, -0.167, 7.573), (2, 1.676, 1.118, 15.031), (2, 1.821, 0.714, 12.462), (2, 2.688, 1.497, 16.292), (2, 3.960, 2.344, 20.103), (2, -0.787, -0.161, 9.819), (2, 3.538, 3.651, 28.366), (2, -0.338, 0.458, 13.088), (2, -0.146, 3.162, 29.120), (2, 3.124, 3.352, 26.989), (2, -0.189, 3.685, 32.301), (2, 0.396, 1.004, 15.626), (2, -0.171, 2.114, 22.858), (2, 3.736, 0.732, 10.659), (2, 1.259, 2.564, 24.127), (2, -0.263, 2.426, 24.820), (2, 1.558, -0.858, 3.292), (2, 2.882, 1.110, 13.776), (2, 0.039, 1.284, 17.666), (2, 3.074, 2.379, 21.201), (2, -0.523, 0.303, 12.344), (2, 0.363, 1.082, 16.132), (2, 2.925, 2.187, 20.195), (2, 0.595, -0.335, 7.397), (2, 0.062, -0.232, 8.544), (2, 0.877, 2.155, 22.050), (2, -0.256, 2.922, 27.788), (2, 1.813, 3.161, 27.152), (2, 2.177, 2.532, 23.016), (2, -0.051, 0.035, 10.263), (2, 2.688, 3.599, 28.906), (2, 2.539, -0.076, 7.008), (2, 2.563, 1.467, 16.240), (2, -0.755, 2.276, 24.410), (2, 3.092, 0.660, 10.868), (2, 2.403, 2.693, 23.756), (2, -0.170, 2.178, 23.239), (2, 2.672, -0.603, 3.712), (2, -0.077, -0.493, 7.116), (2, 1.997, 1.934, 19.608), (2, 1.913, -0.792, 3.335), (2, 0.171, -0.329, 7.857), (2, 2.488, 0.171, 8.540), (2, -0.514, 0.331, 12.500), (2, -0.201, 2.484, 25.103), (2, 2.436, 0.032, 7.759), (2, -0.094, 2.530, 25.275), (2, 2.186, 2.591, 23.358), (2, 3.171, -0.766, 2.231), (2, 2.410, 0.183, 8.687), (2, -0.699, -0.329, 8.728), (2, 3.285, 2.252, 20.228), (2, 1.928, -0.059, 7.720), (2, 3.460, 0.399, 8.931), (2, 2.542, 0.224, 8.801), (2, 2.902, 2.101, 19.702), (2, 3.808, 2.528, 21.358), (2, 0.330, 0.642, 13.522), (2, -0.088, 1.286, 17.804), (2, 3.025, 2.354, 21.100), (2, 3.306, 2.049, 18.986), (2, 1.477, 1.720, 18.845), (2, 2.676, 3.601, 28.931), (2, 1.577, 0.170, 9.443), (2, 1.362, 3.534, 29.843), (2, 2.616, 3.106, 26.018), (2, 3.773, 0.378, 8.496), (2, -0.125, 2.057, 22.465), (2, 3.174, 1.382, 15.120), (2, 0.844, 2.058, 21.503); SELECT ANS[1] > -1.1 AND ANS[1] < -0.9 AND ANS[2] > 5.9 AND ANS[2] < 6.1 AND ANS[3] > 9.9 AND ANS[3] < 10.1 FROM -(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id LIMIT 0, 1); +(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id ORDER BY user_id LIMIT 1, 1); SELECT ANS[1] > 1.9 AND ANS[1] < 2.1 AND ANS[2] > 2.9 AND ANS[2] < 3.1 AND ANS[3] > -3.1 AND ANS[3] < -2.9 FROM -(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id LIMIT 1, 1); +(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id ORDER BY user_id LIMIT 0, 1); DROP TABLE defaults; DROP TABLE model; diff --git a/tests/queries/0_stateless/00953_moving_functions.sql b/tests/queries/0_stateless/00953_moving_functions.sql index daaceeeb3ac..b9046158a16 100644 --- a/tests/queries/0_stateless/00953_moving_functions.sql +++ b/tests/queries/0_stateless/00953_moving_functions.sql @@ -24,6 +24,10 @@ INSERT INTO moving_sum_num SELECT * FROM moving_sum_num ORDER BY k,dt FORMAT TabSeparatedWithNames; +-- Result of function 'groupArrayMovingSum' depends on the order of merging +-- aggregate states which is implementation defined in external aggregation. +SET max_bytes_before_external_group_by = 0; + SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k FORMAT TabSeparatedWithNamesAndTypes; SELECT k, groupArrayMovingSum(3)(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k FORMAT TabSeparatedWithNamesAndTypes; diff --git a/tests/queries/0_stateless/01012_reset_running_accumulate.sql b/tests/queries/0_stateless/01012_reset_running_accumulate.sql index b9336b2f50c..c2c5bf6f87d 100644 --- a/tests/queries/0_stateless/01012_reset_running_accumulate.sql +++ b/tests/queries/0_stateless/01012_reset_running_accumulate.sql @@ -1,3 +1,6 @@ +-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function. +SET max_bytes_before_external_group_by = 0; + SELECT grouping, item, runningAccumulate(state, grouping) diff --git a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql index 3d75fb0ccc9..ae90dc3cc72 100644 --- a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql +++ b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql @@ -1,5 +1,8 @@ SET joined_subquery_requires_alias = 0; SET max_threads = 1; +-- It affects number of read rows and max_rows_to_read. +SET max_bytes_before_external_sort = 0; +SET max_bytes_before_external_group_by = 0; -- incremental streaming usecase -- that has sense only if data filling order has guarantees of chronological order diff --git a/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql b/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql index ba525f30228..c8466b57051 100644 --- a/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql +++ b/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql @@ -1 +1 @@ -select arraySlice(groupArray(x),1,1) as y from (select uniqState(number) as x from numbers(10) group by number); +select arraySlice(groupArray(x), 1, 1) as y from (select uniqState(number) as x from numbers(10) group by number order by number); diff --git a/tests/queries/0_stateless/01090_zookeeper_mutations_and_insert_quorum_long.sql b/tests/queries/0_stateless/01090_zookeeper_mutations_and_insert_quorum_long.sql index db6555e593e..67534a4611e 100644 --- a/tests/queries/0_stateless/01090_zookeeper_mutations_and_insert_quorum_long.sql +++ b/tests/queries/0_stateless/01090_zookeeper_mutations_and_insert_quorum_long.sql @@ -9,7 +9,6 @@ CREATE TABLE mutations_and_quorum2 (`server_date` Date, `something` String) ENGI -- Should not be larger then 600e6 (default timeout in clickhouse-test) SET insert_quorum=2, insert_quorum_parallel=0, insert_quorum_timeout=300e3; -SET insert_keeper_fault_injection_probability=0; INSERT INTO mutations_and_quorum1 VALUES ('2019-01-01', 'test1'), ('2019-02-01', 'test2'), ('2019-03-01', 'test3'), ('2019-04-01', 'test4'), ('2019-05-01', 'test1'), ('2019-06-01', 'test2'), ('2019-07-01', 'test3'), ('2019-08-01', 'test4'), ('2019-09-01', 'test1'), ('2019-10-01', 'test2'), ('2019-11-01', 'test3'), ('2019-12-01', 'test4'); diff --git a/tests/queries/0_stateless/01134_max_rows_to_group_by.sql b/tests/queries/0_stateless/01134_max_rows_to_group_by.sql index bfbc499e1c3..f9ea37cb65a 100644 --- a/tests/queries/0_stateless/01134_max_rows_to_group_by.sql +++ b/tests/queries/0_stateless/01134_max_rows_to_group_by.sql @@ -2,6 +2,9 @@ SET max_block_size = 1; SET max_rows_to_group_by = 10; SET group_by_overflow_mode = 'throw'; +-- Settings 'max_rows_to_group_by' and 'max_bytes_before_external_group_by' are mutually exclusive. +SET max_bytes_before_external_group_by = 0; + SELECT 'test1', number FROM system.numbers GROUP BY number; -- { serverError 158 } SET group_by_overflow_mode = 'break'; diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index eea9ea5f7e5..f2428141264 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-replicated-database +# Tag no-replicated-database: CREATE AS SELECT is disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index 50425eae018..c25cdf4e970 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -29,7 +29,7 @@ create_tables() { groupArray( create1 || toString(number) || create2 || engines[1 + number % length(engines)] || ';\n' || insert1 || toString(number) || insert2 - ), ';\n') FROM numbers($tables) FORMAT TSVRaw;" | $CLICKHOUSE_CLIENT -nm + ), ';\n') FROM numbers($tables) SETTINGS max_bytes_before_external_group_by = 0 FORMAT TSVRaw;" | $CLICKHOUSE_CLIENT -nm } $CLICKHOUSE_CLIENT -q "CREATE DATABASE $db" diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql index 291910ed43f..0d24b238d64 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql @@ -4,6 +4,9 @@ set optimize_distributed_group_by_sharding_key=1; +-- Some queries in this test require sorting after aggregation. +set max_bytes_before_external_group_by = 0; + drop table if exists dist_01247; drop table if exists data_01247; diff --git a/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql b/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql index 267f5585705..3697a167989 100644 --- a/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql +++ b/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql @@ -6,7 +6,12 @@ DROP TABLE IF EXISTS dist; create table data (key String) Engine=Memory(); create table dist (key LowCardinality(String)) engine=Distributed(test_cluster_two_shards, currentDatabase(), data); insert into data values ('foo'); + set distributed_aggregation_memory_efficient=1; + +-- There is an obscure bug in rare corner case. +set max_bytes_before_external_group_by = 0; + select * from dist group by key; DROP TABLE data; diff --git a/tests/queries/0_stateless/01451_replicated_detach_drop_and_quorum_long.sql b/tests/queries/0_stateless/01451_replicated_detach_drop_and_quorum_long.sql index eea231c9f58..21b65995482 100644 --- a/tests/queries/0_stateless/01451_replicated_detach_drop_and_quorum_long.sql +++ b/tests/queries/0_stateless/01451_replicated_detach_drop_and_quorum_long.sql @@ -1,6 +1,5 @@ -- Tags: long, replica, no-replicated-database -SET insert_keeper_fault_injection_probability=0; -- disable fault injection; part ids are non-deterministic in case of insert retries SET replication_alter_partitions_sync = 2; @@ -10,7 +9,7 @@ DROP TABLE IF EXISTS replica2; CREATE TABLE replica1 (v UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/01451/quorum', 'r1') order by tuple() settings max_replicated_merges_in_queue = 0; CREATE TABLE replica2 (v UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/01451/quorum', 'r2') order by tuple() settings max_replicated_merges_in_queue = 0; -INSERT INTO replica1 VALUES (0); +INSERT INTO replica1 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (0); SYSTEM SYNC REPLICA replica2; @@ -27,7 +26,7 @@ ALTER TABLE replica2 DROP PARTITION ID 'all'; SET insert_quorum = 2, insert_quorum_parallel = 0; -INSERT INTO replica2 VALUES (1); +INSERT INTO replica2 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (1); SYSTEM SYNC REPLICA replica2; @@ -39,7 +38,7 @@ SELECT COUNT() FROM replica1; SET insert_quorum_parallel=1; -INSERT INTO replica2 VALUES (2); +INSERT INTO replica2 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (2); -- should work, parallel quorum nodes exists only during insert ALTER TABLE replica1 DROP PART 'all_3_3_0'; diff --git a/tests/queries/0_stateless/01451_replicated_detach_drop_part_long.sql b/tests/queries/0_stateless/01451_replicated_detach_drop_part_long.sql index bf7a471fa40..25b2923ddd9 100644 --- a/tests/queries/0_stateless/01451_replicated_detach_drop_part_long.sql +++ b/tests/queries/0_stateless/01451_replicated_detach_drop_part_long.sql @@ -1,7 +1,6 @@ -- Tags: long, replica, no-replicated-database -- Tag no-replicated-database: Fails due to additional replicas or shards -SET insert_keeper_fault_injection_probability=0; -- disable fault injection; part ids are non-deterministic in case of insert retries SET replication_alter_partitions_sync = 2; DROP TABLE IF EXISTS replica1 SYNC; @@ -10,9 +9,9 @@ DROP TABLE IF EXISTS replica2 SYNC; CREATE TABLE replica1 (v UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/'||currentDatabase()||'test/01451/attach', 'r1') order by tuple() settings max_replicated_merges_in_queue = 0; CREATE TABLE replica2 (v UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/'||currentDatabase()||'test/01451/attach', 'r2') order by tuple() settings max_replicated_merges_in_queue = 0; -INSERT INTO replica1 VALUES (0); -INSERT INTO replica1 VALUES (1); -INSERT INTO replica1 VALUES (2); +INSERT INTO replica1 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (0); +INSERT INTO replica1 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (1); +INSERT INTO replica1 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (2); ALTER TABLE replica1 DETACH PART 'all_100_100_0'; -- { serverError 232 } @@ -25,7 +24,7 @@ SELECT v FROM replica1 ORDER BY v; SELECT name FROM system.detached_parts WHERE table = 'replica2' AND database = currentDatabase(); -ALTER TABLE replica2 ATTACH PART 'all_1_1_0'; +ALTER TABLE replica2 ATTACH PART 'all_1_1_0' SETTINGS insert_keeper_fault_injection_probability=0; SYSTEM SYNC REPLICA replica1; SELECT v FROM replica1 ORDER BY v; diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas.sh index c05d813ca7f..a9a6d27c145 100755 --- a/tests/queries/0_stateless/01459_manual_write_to_replicas.sh +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas.sh @@ -20,10 +20,6 @@ function thread { for x in {0..99}; do # sometimes we can try to commit obsolete part if fetches will be quite fast, # so supress warning messages like "Tried to commit obsolete part ... covered by ..." - # (2) keeper fault injection for inserts because - # it can be a cause of deduplicated parts be visible to SELECTs for sometime (until cleanup thread remove them), - # so the same SELECT on different replicas can return different results, i.e. test output will be non-deterministic - # (see #9712) $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r$1 SELECT $x % $NUM_REPLICAS = $1 ? $x - 1 : $x" 2>/dev/null # Replace some records as duplicates so they will be written by other replicas done } diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh index 01c88336282..1f76a2efc6b 100755 --- a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh @@ -24,7 +24,7 @@ function thread { while true; do $CLICKHOUSE_CLIENT --query "DETACH TABLE r$1" $CLICKHOUSE_CLIENT --query "ATTACH TABLE r$1" - $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 0 --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r$1 SELECT $x" 2>&1 | grep -qE "$valid_exceptions_to_retry" || break + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 0 --query "INSERT INTO r$1 SELECT $x" 2>&1 | grep -qE "$valid_exceptions_to_retry" || break done done } diff --git a/tests/queries/0_stateless/01472_many_rows_in_totals.sql b/tests/queries/0_stateless/01472_many_rows_in_totals.sql index d79d189a28d..bea8c255f21 100644 --- a/tests/queries/0_stateless/01472_many_rows_in_totals.sql +++ b/tests/queries/0_stateless/01472_many_rows_in_totals.sql @@ -1,4 +1,7 @@ +-- Disable external aggregation because it may produce several blocks instead of one. +set max_bytes_before_external_group_by = 0; set output_format_write_statistics = 0; + select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format Pretty; select '--'; diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh index 1ccbe34b10a..22cd6fb8127 100755 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh @@ -20,7 +20,7 @@ done function thread { i=0 retries=300 while [[ $i -lt $retries ]]; do # server can be dead - $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 1 --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r$1 SELECT $2" && break + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 1 --query "INSERT INTO r$1 SELECT $2" && break ((++i)) sleep 0.1 done diff --git a/tests/queries/0_stateless/01509_check_parallel_quorum_inserts_long.sh b/tests/queries/0_stateless/01509_check_parallel_quorum_inserts_long.sh index 6fbdf42914c..1589f17c752 100755 --- a/tests/queries/0_stateless/01509_check_parallel_quorum_inserts_long.sh +++ b/tests/queries/0_stateless/01509_check_parallel_quorum_inserts_long.sh @@ -21,7 +21,7 @@ done $CLICKHOUSE_CLIENT -n -q "SYSTEM STOP REPLICATION QUEUES r2;" function thread { - $CLICKHOUSE_CLIENT --insert_quorum 2 --insert_quorum_parallel 1 --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r1 SELECT $1" + $CLICKHOUSE_CLIENT --insert_quorum 2 --insert_quorum_parallel 1 --query "INSERT INTO r1 SELECT $1" } for i in $(seq 1 $NUM_INSERTS); do diff --git a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh index bf88ad0e0b2..a814759ab10 100755 --- a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh +++ b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh @@ -20,10 +20,9 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE parallel_q2 (x UInt64) ENGINE=ReplicatedMerg $CLICKHOUSE_CLIENT -q "SYSTEM STOP REPLICATION QUEUES parallel_q2" -$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO parallel_q1 VALUES (1)" - -# disable keeper fault injection during insert since test checks part names. Part names can differ in case of retries during insert -$CLICKHOUSE_CLIENT --insert_quorum 2 --insert_quorum_parallel 1 --insert_keeper_fault_injection_probability=0 --query="INSERT INTO parallel_q1 VALUES (2)" & +# This test depends on part names and those aren't deterministic with faults +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO parallel_q1 VALUES (1)" +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --insert_quorum 2 --insert_quorum_parallel 1 --query="INSERT INTO parallel_q1 VALUES (2)" & part_count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT() FROM system.parts WHERE table='parallel_q1' and database='${CLICKHOUSE_DATABASE}'") diff --git a/tests/queries/0_stateless/01509_parallel_quorum_insert_no_replicas_long.sql b/tests/queries/0_stateless/01509_parallel_quorum_insert_no_replicas_long.sql index 5a23473dd0a..24b368090e7 100644 --- a/tests/queries/0_stateless/01509_parallel_quorum_insert_no_replicas_long.sql +++ b/tests/queries/0_stateless/01509_parallel_quorum_insert_no_replicas_long.sql @@ -16,8 +16,6 @@ CREATE TABLE r2 ( ENGINE = ReplicatedMergeTree('/clickhouse/{database}/01509_parallel_quorum_insert_no_replicas', '2') ORDER BY tuple(); -SET insert_keeper_fault_injection_probability=0; - SET insert_quorum_parallel=1; SET insert_quorum=3; diff --git a/tests/queries/0_stateless/01513_count_without_select_sequence_consistency_zookeeper_long.sql b/tests/queries/0_stateless/01513_count_without_select_sequence_consistency_zookeeper_long.sql index 4a992449a16..f800ff86aa5 100644 --- a/tests/queries/0_stateless/01513_count_without_select_sequence_consistency_zookeeper_long.sql +++ b/tests/queries/0_stateless/01513_count_without_select_sequence_consistency_zookeeper_long.sql @@ -20,7 +20,6 @@ SYSTEM SYNC REPLICA quorum3; SET select_sequential_consistency=0; SET optimize_trivial_count_query=1; SET insert_quorum=2, insert_quorum_parallel=0; -SET insert_keeper_fault_injection_probability=0; SYSTEM STOP FETCHES quorum1; diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql index 3d57518d0f4..b107af07194 100644 --- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql +++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql @@ -12,6 +12,7 @@ optimize table data_01513 final; set max_memory_usage='500M'; set max_threads=1; set max_block_size=500; +set max_bytes_before_external_group_by=0; select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241 } select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1; diff --git a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh index 99025890cb3..edf3683ccba 100755 --- a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh +++ b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh @@ -15,6 +15,7 @@ opts=( "--max_block_size=50" "--max_threads=1" "--max_distributed_connections=2" + "--max_bytes_before_external_group_by=0" ) ${CLICKHOUSE_CLIENT} "${opts[@]}" -q "SELECT groupArray(repeat('a', if(_shard_num == 2, 100000, 1))), number%100000 k from remote('127.{2,3}', system.numbers) GROUP BY k LIMIT 10e6" |& { # the query should fail earlier on 127.3 and 127.2 should not even go to the memory limit exceeded error. diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 30beb29251e..49ef9d8b79f 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -2,8 +2,6 @@ -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path -SET insert_keeper_fault_injection_probability=0; -- disable fault injection; part ids are non-deterministic in case of insert retries - DROP TABLE IF EXISTS execute_on_single_replica_r1 SYNC; DROP TABLE IF EXISTS execute_on_single_replica_r2 SYNC; @@ -11,7 +9,7 @@ DROP TABLE IF EXISTS execute_on_single_replica_r2 SYNC; CREATE TABLE execute_on_single_replica_r1 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01532/execute_on_single_replica', 'r1') ORDER BY tuple() SETTINGS execute_merges_on_single_replica_time_threshold=10; CREATE TABLE execute_on_single_replica_r2 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01532/execute_on_single_replica', 'r2') ORDER BY tuple() SETTINGS execute_merges_on_single_replica_time_threshold=10; -INSERT INTO execute_on_single_replica_r1 VALUES (1); +INSERT INTO execute_on_single_replica_r1 SETTINGS insert_keeper_fault_injection_probability=0 VALUES (1); SYSTEM SYNC REPLICA execute_on_single_replica_r2; SET optimize_throw_if_noop=1; diff --git a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference index a2ccd5af868..5187a19cc72 100644 --- a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference +++ b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference @@ -36,6 +36,8 @@ 2023-05-30 14:38:20 1970-01-01 00:00:19 1970-01-01 19:26:40 +1970-01-01 00:00:00 +2106-02-07 06:28:15 \N \N \N diff --git a/tests/queries/0_stateless/01556_accurate_cast_or_null.sql b/tests/queries/0_stateless/01556_accurate_cast_or_null.sql index 2fb7b1177e6..15ac71dea93 100644 --- a/tests/queries/0_stateless/01556_accurate_cast_or_null.sql +++ b/tests/queries/0_stateless/01556_accurate_cast_or_null.sql @@ -39,9 +39,12 @@ SELECT accurateCastOrNull(number + 127, 'Int8') AS x FROM numbers (2) ORDER BY x SELECT accurateCastOrNull(-1, 'DateTime'); SELECT accurateCastOrNull(5000000000, 'DateTime'); SELECT accurateCastOrNull('1xxx', 'DateTime'); -select toString(accurateCastOrNull('2023-05-30 14:38:20', 'DateTime'), timezone()); +SELECT toString(accurateCastOrNull('2023-05-30 14:38:20', 'DateTime'), timezone()); SELECT toString(accurateCastOrNull(19, 'DateTime'), 'UTC'); SELECT toString(accurateCastOrNull(70000, 'DateTime'), 'UTC'); +-- need fixed timezone in these two lines +SELECT toString(accurateCastOrNull('1965-05-30 14:38:20', 'DateTime'), timezone()) SETTINGS session_timezone = 'UTC'; +SELECT toString(accurateCastOrNull('2223-05-30 14:38:20', 'DateTime'), timezone()) SETTINGS session_timezone = 'UTC'; SELECT accurateCastOrNull(-1, 'Date'); SELECT accurateCastOrNull(5000000000, 'Date'); diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index b49042ead9d..0019c714e40 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -28,7 +28,7 @@ opts=( "--prefer_localhost_replica=0" ) $CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < (x IN (select '1')), attr_list) z FROM TESTTABLE ARRAY JOIN z AS attr ORDER BY _id LIMIT 3 BY attr; diff --git a/tests/queries/0_stateless/02286_parallel_final.sh b/tests/queries/0_stateless/02286_parallel_final.sh index de0cca0e966..788b4c0e9b5 100755 --- a/tests/queries/0_stateless/02286_parallel_final.sh +++ b/tests/queries/0_stateless/02286_parallel_final.sh @@ -11,7 +11,7 @@ test_random_values() { create table tbl_8parts_${layers}granules_rnd (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 % 8); insert into tbl_8parts_${layers}granules_rnd select number, 1 from numbers_mt($((layers * 8 * 8192))); optimize table tbl_8parts_${layers}granules_rnd final; - explain pipeline select * from tbl_8parts_${layers}granules_rnd final settings max_threads = 16;" 2>&1 | + explain pipeline select * from tbl_8parts_${layers}granules_rnd final settings max_threads = 16, do_not_merge_across_partitions_select_final = 0;;" 2>&1 | grep -c "CollapsingSortedTransform" } @@ -25,7 +25,7 @@ test_sequential_values() { create table tbl_8parts_${layers}granules_seq (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 / $((layers * 8192)))::UInt64; insert into tbl_8parts_${layers}granules_seq select number, 1 from numbers_mt($((layers * 8 * 8192))); optimize table tbl_8parts_${layers}granules_seq final; - explain pipeline select * from tbl_8parts_${layers}granules_seq final settings max_threads = 8;" 2>&1 | + explain pipeline select * from tbl_8parts_${layers}granules_seq final settings max_threads = 8, do_not_merge_across_partitions_select_final = 0;" 2>&1 | grep -c "CollapsingSortedTransform" } diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference index 7f4ba0901b6..0b0b4175e1f 100644 --- a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference +++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference @@ -33,3 +33,38 @@ Get descendants at first level [] [] [] +Get hierarchy +[] +[1] +[2,1] +[3,1] +[4,2,1] +[] +Get is in hierarchy +0 +1 +1 +1 +1 +0 +Get children +[1] +[2,3] +[4] +[] +[] +[] +Get all descendants +[1,2,3,4] +[2,3,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2,3] +[4] +[] +[] +[] diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2 similarity index 91% rename from tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql rename to tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2 index a775f0e5cbf..bc13bcfdb09 100644 --- a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql +++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2 @@ -7,6 +7,8 @@ CREATE TABLE hierarchy_source_table INSERT INTO hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2); +{% for dictionary_config in ['', 'SHARDS 16'] -%} + DROP DICTIONARY IF EXISTS hierarchy_hashed_array_dictionary; CREATE DICTIONARY hierarchy_hashed_array_dictionary ( @@ -15,7 +17,7 @@ CREATE DICTIONARY hierarchy_hashed_array_dictionary ) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'hierarchy_source_table')) -LAYOUT(HASHED_ARRAY()) +LAYOUT(HASHED_ARRAY({{ dictionary_config }})) LIFETIME(MIN 1 MAX 1000); SELECT 'Get hierarchy'; @@ -29,6 +31,8 @@ SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number) FROM syst SELECT 'Get descendants at first level'; SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number, 1) FROM system.numbers LIMIT 6; +{% endfor %} + DROP DICTIONARY hierarchy_hashed_array_dictionary; DROP TABLE hierarchy_source_table; diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference index 60d9fb16c5f..ab6a247219b 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference @@ -106,6 +106,42 @@ Get descendants at first level [] [] [] +HashedArray dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 +Get children +[1] +[2] +[4] +[] +[] +[] +Get all descendants +[1,2,4] +[2,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2] +[4] +[] +[] +[] Cache dictionary Get hierarchy [0] diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2 similarity index 97% rename from tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql rename to tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2 index d477d58d398..b456495513e 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2 @@ -56,7 +56,7 @@ SELECT 'Get descendants at first level'; SELECT dictGetDescendants('hierachical_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6; DROP DICTIONARY hierachical_hashed_dictionary; - +{% for dictionary_config in ['', 'SHARDS 16'] -%} DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary; CREATE DICTIONARY hierachical_hashed_array_dictionary ( @@ -64,7 +64,7 @@ CREATE DICTIONARY hierachical_hashed_array_dictionary parent_id Nullable(UInt64) HIERARCHICAL ) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) -LAYOUT(HASHED_ARRAY()) +LAYOUT(HASHED_ARRAY({{ dictionary_config }})) LIFETIME(0); SELECT 'HashedArray dictionary'; @@ -82,6 +82,8 @@ SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number, 1) FROM DROP DICTIONARY hierachical_hashed_array_dictionary; +{% endfor %} + DROP DICTIONARY IF EXISTS hierachical_cache_dictionary; CREATE DICTIONARY hierachical_cache_dictionary ( diff --git a/tests/queries/0_stateless/02352_lightweight_delete.reference b/tests/queries/0_stateless/02352_lightweight_delete.reference index 3386b3294c3..ce7c6e81ac8 100644 --- a/tests/queries/0_stateless/02352_lightweight_delete.reference +++ b/tests/queries/0_stateless/02352_lightweight_delete.reference @@ -26,7 +26,7 @@ Rows in parts 800000 Count 700000 First row 300000 1 Do ALTER DELETE mutation that does a "heavyweight" delete -Rows in parts 533333 +Rows in parts 466666 Count 466666 First row 300001 10 Delete 100K more rows using lightweight DELETE diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 7de2c7089b8..08551794c2e 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -21,7 +21,7 @@ function wait_query_by_id_started() # wait for query to be started while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do if [ "$( - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT --max_bytes_before_external_group_by 0 -nm -q " system flush logs; select count() from system.query_log @@ -56,7 +56,7 @@ while :; do insert_query_id="insert-$(random_str 10)" # 20 seconds sleep - $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & + $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --max_bytes_before_external_group_by 0 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & if ! wait_query_by_id_started "$insert_query_id"; then wait continue diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql index 3de489b0815..0a3c1f3dcd4 100644 --- a/tests/queries/0_stateless/02366_kql_extend.sql +++ b/tests/queries/0_stateless/02366_kql_extend.sql @@ -12,16 +12,19 @@ -- 'Costco','Snargaluff',200,'2016-09-10', -- ] + DROP TABLE IF EXISTS Ledger; CREATE TABLE Ledger -( +( Supplier Nullable(String), Fruit String , Price Float64, - Purchase Date + Purchase Date ) ENGINE = Memory; INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); +-- This test requies sorting after some of aggregations but I don't know KQL, sorry +set max_bytes_before_external_group_by = 0; set dialect = 'kusto'; print '-- extend #1 --'; diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql index ecf2ef43cc4..c9ca91c0be0 100644 --- a/tests/queries/0_stateless/02366_kql_makeseries.sql +++ b/tests/queries/0_stateless/02366_kql_makeseries.sql @@ -14,31 +14,34 @@ -- ]; DROP TABLE IF EXISTS make_series_test_table; CREATE TABLE make_series_test_table -( +( Supplier Nullable(String), Fruit String , Price Float64, - Purchase Date + Purchase Date ) ENGINE = Memory; INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); DROP TABLE IF EXISTS make_series_test_table2; CREATE TABLE make_series_test_table2 -( +( Supplier Nullable(String), Fruit String , Price Int32, - Purchase Int32 + Purchase Int32 ) ENGINE = Memory; INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); DROP TABLE IF EXISTS make_series_test_table3; CREATE TABLE make_series_test_table3 -( +( timestamp datetime, metric Float64, ) ENGINE = Memory; INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00', 'UTC'), 50), (parseDateTimeBestEffort('2017-01-01', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-02', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-03', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-03T03:00', 'UTC'), 6), (parseDateTimeBestEffort('2017-01-05', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-05T13:40', 'UTC'), 13), (parseDateTimeBestEffort('2017-01-06', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-07', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-08', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-08T21:00', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-09', 'UTC'), 2), (parseDateTimeBestEffort('2017-01-09T12:00', 'UTC'), 11), (parseDateTimeBestEffort('2017-01-10T05:00', 'UTC'), 5); +-- This test requies sorting after some of aggregations but I don't know KQL, sorry +set max_bytes_before_external_group_by = 0; set dialect = 'kusto'; + print '-- from to'; make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; print '-- from'; @@ -68,7 +71,7 @@ make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase print '-- without by'; make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; -make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d +make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d -- print '-- summarize --' -- make_series_test_table | summarize count() by format_datetime(bin(Purchase, 1d), 'yy-MM-dd'); diff --git a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql index b42066cdf52..7946b997b00 100644 --- a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql +++ b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql @@ -1,6 +1,6 @@ drop table if exists test; create table test (x AggregateFunction(uniq, UInt64), y Int64) engine=Memory; -insert into test select uniqState(number) as x, number as y from numbers(10) group by number; +insert into test select uniqState(number) as x, number as y from numbers(10) group by number order by x, y; select uniqStateMap(map(1, x)) OVER (PARTITION BY y) from test; select uniqStateForEach([x]) OVER (PARTITION BY y) from test; select uniqStateResample(30, 75, 30)([x], 30) OVER (PARTITION BY y) from test; diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 47a6cf10bda..d5b3ceef46a 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -55,7 +55,15 @@ $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create databas $CLICKHOUSE_CLIENT -q "system sync database replica $db4" $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db4'" +# Don't throw "replica doesn't exist" when removing all replicas [from a database] +$CLICKHOUSE_CLIENT -q "system drop database replica 'doesntexist$CLICKHOUSE_DATABASE' from shard 'doesntexist'" + $CLICKHOUSE_CLIENT -q "drop database $db" $CLICKHOUSE_CLIENT -q "drop database $db2" $CLICKHOUSE_CLIENT -q "drop database $db3" + +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db4.rmt (n int) engine=ReplicatedMergeTree order by n" +$CLICKHOUSE_CLIENT -q "system drop replica 'doesntexist$CLICKHOUSE_DATABASE' from database $db4" +$CLICKHOUSE_CLIENT -q "system drop replica 'doesntexist$CLICKHOUSE_DATABASE'" + $CLICKHOUSE_CLIENT -q "drop database $db4" diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh index 5d9844d5030..9ce4b459fce 100755 --- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh @@ -42,7 +42,7 @@ TIMEOUT=55 for i in {1..4} do - timeout $TIMEOUT bash -c drop_mv $i & + timeout $TIMEOUT bash -c "drop_mv $i" & done for i in {1..4} diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.sql b/tests/queries/0_stateless/02480_max_map_null_totals.sql index 2c970e25fd5..be2c566ddc1 100644 --- a/tests/queries/0_stateless/02480_max_map_null_totals.sql +++ b/tests/queries/0_stateless/02480_max_map_null_totals.sql @@ -36,4 +36,4 @@ SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numb SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number; SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number; -SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number; \ No newline at end of file +SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number; diff --git a/tests/integration/test_max_http_connections_for_replication/__init__.py b/tests/queries/0_stateless/02487_create_index_normalize_functions.reference similarity index 100% rename from tests/integration/test_max_http_connections_for_replication/__init__.py rename to tests/queries/0_stateless/02487_create_index_normalize_functions.reference diff --git a/tests/queries/0_stateless/02487_create_index_normalize_functions.sql b/tests/queries/0_stateless/02487_create_index_normalize_functions.sql new file mode 100644 index 00000000000..2155f5d6665 --- /dev/null +++ b/tests/queries/0_stateless/02487_create_index_normalize_functions.sql @@ -0,0 +1,6 @@ + +create table rmt (n int, ts DateTime64(8, 'UTC')) engine=ReplicatedMergeTree('/test/02487/{database}/rmt', '1') order by n; +alter table rmt add index idx1 date(ts) TYPE MinMax GRANULARITY 1; +create index idx2 on rmt date(ts) TYPE MinMax GRANULARITY 1; +system restart replica rmt; +create table rmt2 (n int, ts DateTime64(8, 'UTC'), index idx1 date(ts) TYPE MinMax GRANULARITY 1, index idx2 date(ts) TYPE MinMax GRANULARITY 1) engine=ReplicatedMergeTree('/test/02487/{database}/rmt', '2') order by n; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index b318157835d..e7c169cf45e 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -113,27 +113,26 @@ FROM ) ORDER BY number DESC ) AS t2 +ORDER BY t1.number, t2.number -- explain -Expression ((Projection + Before ORDER BY)) - Join (JOIN FillRightFirst) - Expression ((Before JOIN + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Join (JOIN FillRightFirst) + Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + Projection))) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) ReadFromSystemNumbers -- execute -0 2 -0 1 0 0 -1 2 -1 1 +0 1 +0 2 1 0 -2 2 -2 1 +1 1 +1 2 2 0 +2 1 +2 2 -- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries -- query SELECT * @@ -193,15 +192,18 @@ FROM ORDER BY number DESC ) GROUP BY number +ORDER BY number -- explain -Expression ((Projection + Before ORDER BY)) - Aggregating - Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - ReadFromSystemNumbers +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Aggregating + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery -- query SELECT any(number) @@ -217,15 +219,18 @@ FROM ORDER BY number DESC ) GROUP BY number +ORDER BY number -- explain -Expression ((Projection + Before ORDER BY)) - Aggregating - Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - ReadFromSystemNumbers +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Aggregating + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- query with aggregation function but w/o GROUP BY -> remove sorting -- query SELECT sum(number) @@ -315,15 +320,18 @@ FROM GROUP BY number ) WHERE a > 0 +ORDER BY a -- explain -Expression ((Projection + (Before ORDER BY + ))) - Aggregating - Filter - Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) - ReadFromSystemNumbers +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + )) + Aggregating + Filter + Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) + ReadFromSystemNumbers -- execute -2 1 +2 -- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps -- query SELECT * diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 8b529c26d93..c676e0340b1 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -96,7 +96,8 @@ FROM ORDER BY number ASC ) ORDER BY number DESC -) AS t2" +) AS t2 +ORDER BY t1.number, t2.number" run_query "$query" echo "-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries" @@ -138,7 +139,8 @@ FROM ) ORDER BY number DESC ) -GROUP BY number" +GROUP BY number +ORDER BY number" run_query "$query" echo "-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery" @@ -154,7 +156,8 @@ FROM ) ORDER BY number DESC ) -GROUP BY number" +GROUP BY number +ORDER BY number" run_query "$query" echo "-- query with aggregation function but w/o GROUP BY -> remove sorting" @@ -218,7 +221,8 @@ FROM ) GROUP BY number ) -WHERE a > 0" +WHERE a > 0 +ORDER BY a" run_query "$query" echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index ee2099c62ba..16d3327b9c2 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -113,27 +113,26 @@ FROM ) ORDER BY number DESC ) AS t2 +ORDER BY t1.number, t2.number -- explain -Expression ((Project names + (Projection + DROP unused columns after JOIN))) - Join (JOIN FillRightFirst) - Expression ((Change column names to column identifiers + Project names)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + DROP unused columns after JOIN))) + Join (JOIN FillRightFirst) + Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))) ReadFromSystemNumbers - Expression ((Change column names to column identifiers + Project names)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))) ReadFromSystemNumbers -- execute -0 2 -0 1 0 0 -1 2 -1 1 +0 1 +0 2 1 0 -2 2 -2 1 +1 1 +1 2 2 0 +2 1 +2 2 -- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries -- query SELECT * @@ -193,15 +192,18 @@ FROM ORDER BY number DESC ) GROUP BY number +ORDER BY number -- explain -Expression ((Project names + Projection)) - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))) - ReadFromSystemNumbers +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery -- query SELECT any(number) @@ -217,17 +219,20 @@ FROM ORDER BY number DESC ) GROUP BY number +ORDER BY number -- explain -Expression ((Project names + Projection)) - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) - ReadFromSystemNumbers +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- query with aggregation function but w/o GROUP BY -> remove sorting -- query SELECT sum(number) @@ -319,17 +324,20 @@ FROM GROUP BY number ) WHERE a > 0 +ORDER BY a -- explain -Expression ((Project names + Projection)) - Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection)))) - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) - ReadFromSystemNumbers +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection)))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers -- execute -2 1 +2 -- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps -- query SELECT * diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index 3f580763dba..d7623cd5541 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -73,22 +73,24 @@ FROM SELECT DISTINCT number AS n FROM numbers(2) ) as y +ORDER BY x.n, y.n -- explain Expression (Projection) Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - Join (JOIN FillRightFirst) - Expression ((Before JOIN + Projection)) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + Projection))) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + Join (JOIN FillRightFirst) + Expression ((Before JOIN + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + Projection))) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers -- execute 0 0 0 1 @@ -106,12 +108,15 @@ FROM FROM numbers(3) ) ) +ORDER BY a, b -- explain -Expression ((Projection + (Before ORDER BY + (Projection + (Before ORDER BY + Projection))))) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + Projection)))) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers -- execute 0 0 1 2 @@ -128,12 +133,15 @@ FROM FROM numbers(3) ) ) +ORDER BY a, b -- explain -Expression ((Projection + (Before ORDER BY + (Projection + (Before ORDER BY + Projection))))) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers +Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + Projection)))) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers -- execute 2 0 0 2 1 2 @@ -147,21 +155,23 @@ FROM FROM VALUES('Hello', 'World', 'Goodbye') ) AS words ARRAY JOIN [0, 1] AS arr +ORDER BY c1, arr -- explain Expression (Projection) Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ArrayJoin (ARRAY JOIN) - Expression ((Before ARRAY JOIN + Projection)) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromStorage (Values) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ArrayJoin (ARRAY JOIN) + Expression ((Before ARRAY JOIN + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (Values) -- execute +Goodbye Hello World -Goodbye -- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs -- query SELECT DISTINCT * @@ -194,16 +204,18 @@ FROM SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities ) WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim'] +ORDER BY cities -- explain Expression (( + Projection)) Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - Filter ((WHERE + Projection)) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromStorage (SystemOne) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + Filter ((WHERE + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemOne) -- execute ['Istanbul','Berlin','Bensheim'] -- GROUP BY before DISTINCT with on the same columns => remove DISTINCT @@ -222,20 +234,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a ) -- explain -Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY)))) - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers +Expression ((Projection + (Before ORDER BY + Projection))) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -252,19 +267,22 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a ) -- explain Expression (Projection) Distinct Distinct (Preliminary DISTINCT) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers + Expression ((Before ORDER BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 12 -- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT @@ -283,20 +301,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a ) -- explain Expression (Projection) Distinct Distinct (Preliminary DISTINCT) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Rollup - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers + Expression ((Before ORDER BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Rollup + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 12 36 @@ -316,22 +337,25 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a ) -- explain -Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY)))) - Rollup - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers +Expression ((Projection + (Before ORDER BY + Projection))) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Rollup + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 0 -2 -1 0 +1 +2 -- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -348,20 +372,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a ) -- explain Expression (Projection) Distinct Distinct (Preliminary DISTINCT) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Cube - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers + Expression ((Before ORDER BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Cube + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 12 36 @@ -381,22 +408,25 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a ) -- explain -Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY)))) - Cube - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers +Expression ((Projection + (Before ORDER BY + Projection))) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Cube + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 0 -2 -1 0 +1 +2 -- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -413,20 +443,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a ) -- explain Expression (Projection) Distinct Distinct (Preliminary DISTINCT) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - TotalsHaving - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers + Expression ((Before ORDER BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 12 @@ -447,21 +480,24 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a ) -- explain -Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY)))) - TotalsHaving - Aggregating - Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Join (JOIN FillRightFirst) - Expression (Before JOIN) - ReadFromSystemNumbers - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromSystemNumbers +Expression ((Projection + (Before ORDER BY + Projection))) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Join (JOIN FillRightFirst) + Expression (Before JOIN) + ReadFromSystemNumbers + Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) + ReadFromSystemNumbers -- execute 0 -2 1 +2 0 -- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT @@ -488,21 +524,23 @@ FROM SELECT DISTINCT number FROM numbers(2) ) +ORDER BY number -- explain Expression (Projection) Distinct - Distinct (Preliminary DISTINCT) - Union - Expression ((Before ORDER BY + Projection)) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers - Expression (( + Projection)) - Distinct - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) - ReadFromSystemNumbers + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Union + Expression ((Before ORDER BY + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers + Expression (( + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromSystemNumbers -- execute 0 1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index f07cdca4b5a..c4f0994cd13 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -59,7 +59,8 @@ FROM ( SELECT DISTINCT number AS n FROM numbers(2) -) as y" +) as y +ORDER BY x.n, y.n" run_query "$query" echo "-- DISTINCT duplicates with several columns" @@ -72,7 +73,8 @@ FROM SELECT DISTINCT number as a, 2*number as b FROM numbers(3) ) -)" +) +ORDER BY a, b" run_query "$query" echo "-- DISTINCT duplicates with constant columns" @@ -85,7 +87,8 @@ FROM SELECT DISTINCT 1, number as a, 2*number as b FROM numbers(3) ) -)" +) +ORDER BY a, b" run_query "$query" echo "-- ARRAY JOIN: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs" @@ -95,7 +98,8 @@ FROM SELECT DISTINCT * FROM VALUES('Hello', 'World', 'Goodbye') ) AS words -ARRAY JOIN [0, 1] AS arr" +ARRAY JOIN [0, 1] AS arr +ORDER BY c1, arr" run_query "$query" echo "-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs" @@ -114,7 +118,8 @@ FROM ( SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities ) -WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']" +WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim'] +ORDER BY cities" run_query "$query" echo "-- GROUP BY before DISTINCT with on the same columns => remove DISTINCT" @@ -132,6 +137,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a )" run_query "$query" @@ -150,6 +156,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a )" run_query "$query" @@ -168,6 +175,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a )" run_query "$query" @@ -186,6 +194,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a )" run_query "$query" @@ -204,6 +213,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a )" run_query "$query" @@ -222,6 +232,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a )" run_query "$query" @@ -240,6 +251,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a )" run_query "$query" @@ -258,6 +270,7 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a )" run_query "$query" @@ -274,5 +287,6 @@ FROM UNION ALL SELECT DISTINCT number FROM numbers(2) -)" +) +ORDER BY number" run_query "$query" diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference index a5f2c3e5ca3..b79f6310166 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -74,22 +74,25 @@ FROM SELECT DISTINCT number AS n FROM numbers(2) ) as y +ORDER BY x.n, y.n -- explain Expression (Project names) Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + DROP unused columns after JOIN)) - Join (JOIN FillRightFirst) - Expression ((Change column names to column identifiers + Project names)) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers - Expression ((Change column names to column identifiers + Project names)) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + DROP unused columns after JOIN)) + Join (JOIN FillRightFirst) + Expression ((Change column names to column identifiers + Project names)) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers + Expression ((Change column names to column identifiers + Project names)) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers -- execute 0 0 0 1 @@ -107,12 +110,15 @@ FROM FROM numbers(3) ) ) +ORDER BY a, b -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers -- execute 0 0 1 2 @@ -129,12 +135,15 @@ FROM FROM numbers(3) ) ) +ORDER BY a, b -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers -- execute 2 0 0 2 1 2 @@ -148,21 +157,24 @@ FROM FROM VALUES('Hello', 'World', 'Goodbye') ) AS words ARRAY JOIN [0, 1] AS arr +ORDER BY c1, arr -- explain Expression (Project names) Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression (Projection) - ArrayJoin (ARRAY JOIN) - Expression ((DROP unused columns before ARRAY JOIN + (ARRAY JOIN actions + (Change column names to column identifiers + Project names)))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromStorage (Values) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Projection) + ArrayJoin (ARRAY JOIN) + Expression ((DROP unused columns before ARRAY JOIN + (ARRAY JOIN actions + (Change column names to column identifiers + Project names)))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (Values) -- execute +Goodbye Hello World -Goodbye -- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs -- query SELECT DISTINCT * @@ -196,16 +208,19 @@ FROM SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities ) WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim'] +ORDER BY cities -- explain Expression (Project names) Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression (Projection) - Filter ((WHERE + (Change column names to column identifiers + Project names))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromStorage (SystemOne) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Projection) + Filter ((WHERE + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemOne) -- execute ['Istanbul','Berlin','Bensheim'] -- GROUP BY before DISTINCT with on the same columns => remove DISTINCT @@ -224,20 +239,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a ) -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers +Expression ((Project names + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 0 -2 1 +2 -- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -254,19 +272,22 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a + ORDER BY a ) -- explain Expression (Project names) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) - Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers + Expression ((Projection + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 12 -- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT @@ -285,20 +306,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a ) -- explain Expression (Project names) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) - Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) - Rollup - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers + Expression ((Projection + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Rollup + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 12 36 @@ -318,22 +342,25 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH ROLLUP + ORDER BY a ) -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) - Rollup - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers +Expression ((Project names + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Rollup + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 0 -2 -1 0 +1 +2 -- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -350,20 +377,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a ) -- explain Expression (Project names) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) - Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) - Cube - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers + Expression ((Projection + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Cube + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 12 36 @@ -383,22 +413,25 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH CUBE + ORDER BY a ) -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) - Cube - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers +Expression ((Project names + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Cube + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 0 -2 -1 0 +1 +2 -- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT -- query SELECT DISTINCT c @@ -415,20 +448,23 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a ) -- explain Expression (Project names) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) - Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) - TotalsHaving - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers + Expression ((Projection + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 12 @@ -449,21 +485,24 @@ FROM FROM numbers(3) AS x, numbers(3, 3) AS y ) GROUP BY a WITH TOTALS + ORDER BY a ) -- explain -Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) - TotalsHaving - Aggregating - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) - Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) - ReadFromSystemNumbers - Expression (Change column names to column identifiers) - ReadFromSystemNumbers +Expression ((Project names + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromSystemNumbers + Expression (Change column names to column identifiers) + ReadFromSystemNumbers -- execute 0 -2 1 +2 0 -- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT @@ -490,21 +529,24 @@ FROM SELECT DISTINCT number FROM numbers(2) ) +ORDER BY number -- explain Expression (Project names) Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Union - Expression ((Projection + (Change column names to column identifiers + Project names))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers - Expression (( + ( + Project names))) - Distinct (DISTINCT) - Distinct (Preliminary DISTINCT) - Expression ((Projection + Change column names to column identifiers)) - ReadFromSystemNumbers + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Union + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers + Expression (( + ( + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromSystemNumbers -- execute 0 1 diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference index 83571fd9005..86e7e2a6a49 100644 --- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference @@ -29,3 +29,23 @@ ['1'] [] 0 [] [] 3 +--- +[] 0 ['2'] +['0'] 2 ['0'] +['0'] 2 ['0'] +['1'] 1 [] + +[] 3 [] +--- +[] 0 ['2'] 1 +['0'] 2 ['0'] 2 +['1'] 1 [] 0 + +[] 3 [] 3 +--- +[] ['2'] 1 +['0'] ['0'] 2 +['0'] ['0'] 2 +['1'] [] 0 + +[] [] 3 diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql.j2 similarity index 94% rename from tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql rename to tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql.j2 index d39efb0b193..09447dfce65 100644 --- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql.j2 @@ -70,6 +70,12 @@ ALL LEFT JOIN ) AS js2 USING (a) ORDER BY b ASC NULLS FIRST; + + +{% for join_algorithm in ['default', 'partial_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + SELECT '---'; SELECT * @@ -112,3 +118,5 @@ FULL JOIN ( ON l.item_id = r.item_id ORDER BY 1,2,3 ; + +{% endfor %} diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference index 3b40d9048cd..e60b2a184db 100644 --- a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference @@ -15,7 +15,7 @@ SELECT 'Count', count() FROM lwd_test_02521; Count 25000 ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1; SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active; -Rows in parts 40000 +Rows in parts 15000 SELECT 'Count', count() FROM lwd_test_02521; Count 15000 OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1; diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql index 8ad06bd68cb..0eeab99e539 100644 --- a/tests/queries/0_stateless/02567_and_consistency.sql +++ b/tests/queries/0_stateless/02567_and_consistency.sql @@ -5,6 +5,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) +ORDER BY ALL SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -16,6 +17,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(1) +ORDER BY ALL SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -27,6 +29,7 @@ FROM ) GROUP BY number HAVING x AND sin(sum(number)) +ORDER BY ALL SETTINGS enable_optimize_predicate_expression = 1; SELECT '====='; @@ -38,6 +41,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) +ORDER BY ALL SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -57,6 +61,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) +ORDER BY ALL SETTINGS enable_optimize_predicate_expression = 1; select '#45440'; @@ -72,14 +77,18 @@ SELECT NOT h, h IS NULL FROM t2 AS left -GROUP BY g; -select '='; +GROUP BY g +ORDER BY g DESC; + +SELECT '='; + SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null FROM t2 AS left - GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 0; -select '='; + GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 0; +SELECT '='; + SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null FROM t2 AS left - GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 1; + GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference index 60ff2d76995..cc74b0237fe 100644 --- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference +++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference @@ -88,3 +88,4 @@ QUERY id: 0 COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3 CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 1 +1 diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql index eebea322dbf..5dee450086c 100644 --- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql +++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql @@ -25,4 +25,6 @@ EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 3 AND b = 'an SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a; EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a; +SELECT a FROM 02668_logical_optimizer WHERE (b = 'test') AND ('test' = b); + SELECT (k = 3) OR ( (k = 1) OR (k = 2) OR ( (NULL OR 1) = k ) ) FROM ( SELECT materialize(1) AS k ); diff --git a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference index eb79bbc842a..e7f46a974e6 100644 --- a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference +++ b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.reference @@ -75,3 +75,5 @@ QUERY id: 0 LIST id: 6, nodes: 2 COLUMN id: 7, column_name: a, result_type: Nullable(Int32), source_id: 3 CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_3, UInt64_2), constant_value_type: Tuple(UInt8, UInt8, UInt8) +1 +1 diff --git a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql index 07d0b170a02..72ab507f541 100644 --- a/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql +++ b/tests/queries/0_stateless/02702_logical_optimizer_with_nulls.sql @@ -29,4 +29,7 @@ INSERT INTO 02702_logical_optimizer_with_null_column VALUES (1, 'test'), (2, 'te SELECT * FROM 02702_logical_optimizer_with_null_column WHERE a = 1 OR 3 = a OR 2 = a; EXPLAIN QUERY TREE SELECT * FROM 02702_logical_optimizer_with_null_column WHERE a = 1 OR 3 = a OR 2 = a; +SELECT materialize(1) AS k WHERE NULL OR (0 OR (k = 2) OR (k = CAST(1, 'Nullable(UInt8)') OR k = 3)); +SELECT (k = 2) OR (k = 1) OR ((NULL OR 1) = k) FROM (SELECT 1 AS k); + DROP TABLE 02702_logical_optimizer_with_null_column; diff --git a/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql index 7930b2ca0cc..12572982ddd 100644 --- a/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql +++ b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql @@ -2,6 +2,6 @@ drop table if exists test ; create table test(str Nullable(String), i Int64) engine=Memory(); insert into test values(null, 1),('', 2),('s', 1); select '-----------String------------'; -select str ,max(i) from test group by str; +select str, max(i) from test group by str order by str nulls first; drop table test; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index fa40c96048c..a117378b0bf 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -1,5 +1,7 @@ -- Tags: zookeeper +DROP TABLE IF EXISTS join_inner_table SYNC; + CREATE TABLE join_inner_table ( id UUID, @@ -77,6 +79,8 @@ ORDER BY is_initial_query, c, query; ---- Query with JOIN +DROP TABLE IF EXISTS join_outer_table SYNC; + CREATE TABLE join_outer_table ( id UUID, diff --git a/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 index ea979506e07..67e8f098217 100644 --- a/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 +++ b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 @@ -14,6 +14,7 @@ SET max_memory_usage='4Mi'; 'FLAT(INITIAL_ARRAY_SIZE 3_000_000 MAX_ARRAY_SIZE 3_000_000)', 'HASHED()', 'HASHED_ARRAY()', + 'HASHED_ARRAY(SHARDS 2)', 'SPARSE_HASHED()', 'SPARSE_HASHED(SHARDS 2 /* shards are special, they use threads */)', ] %} diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh index 6c697095b57..bafab249b47 100755 --- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh +++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-replicated-database +# Tag no-replicated-database: CREATE AS SELECT is disabled CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02833_local_with_dialect.reference b/tests/queries/0_stateless/02833_local_with_dialect.reference index dbb67375997..573541ac970 100644 --- a/tests/queries/0_stateless/02833_local_with_dialect.reference +++ b/tests/queries/0_stateless/02833_local_with_dialect.reference @@ -1,2 +1 @@ 0 -[?2004h[?2004lBye. diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh index 012a6d91269..de009961cba 100755 --- a/tests/queries/0_stateless/02833_local_with_dialect.sh +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -6,4 +6,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive +# Remove last line since the good bye message changes depending on the date +echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive | head -n -1 diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference new file mode 100644 index 00000000000..bac15838dc2 --- /dev/null +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -0,0 +1,100 @@ +1 +-- { echoOn } + +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + GROUP BY ac, nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10; +0 2 0 +1 2 0 +2 2 0 +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + GROUP BY ac, nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10 +SETTINGS max_threads = 1; +0 2 0 +1 2 0 +2 2 0 +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 0 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 1 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 2 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 3 + GROUP BY + ac, + nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10; +0 2 0 +1 2 0 +2 2 0 diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql new file mode 100644 index 00000000000..c5ab013a198 --- /dev/null +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -0,0 +1,121 @@ +-- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug + +CREATE TABLE window_funtion_threading +Engine = MergeTree +ORDER BY (ac, nw) +AS SELECT + toUInt64(toFloat32(number % 2) % 20000000) as ac, + toFloat32(1) as wg, + toUInt16(toFloat32(number % 3) % 400) as nw +FROM numbers_mt(10000000); + +SELECT count() FROM (EXPLAIN PIPELINE SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + GROUP BY ac, nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10) where explain ilike '%ScatterByPartitionTransform%' SETTINGS max_threads = 4; + +-- { echoOn } + +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + GROUP BY ac, nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10; + +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + GROUP BY ac, nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10 +SETTINGS max_threads = 1; + +SELECT + nw, + sum(WR) AS R, + sumIf(WR, uniq_rows = 1) AS UNR +FROM +( + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 0 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 1 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 2 + GROUP BY + ac, + nw + UNION ALL + SELECT + uniq(nw) OVER (PARTITION BY ac) AS uniq_rows, + AVG(wg) AS WR, + ac, + nw + FROM window_funtion_threading + WHERE (ac % 4) = 3 + GROUP BY + ac, + nw +) +GROUP BY nw +ORDER BY nw ASC, R DESC +LIMIT 10; diff --git a/tests/queries/0_stateless/02884_parallel_window_functions_bug.reference b/tests/queries/0_stateless/02884_parallel_window_functions_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.sql b/tests/queries/0_stateless/02884_parallel_window_functions_bug.sql similarity index 54% rename from tests/queries/0_stateless/02942_window_functions_logical_error.sql rename to tests/queries/0_stateless/02884_parallel_window_functions_bug.sql index 1e4371a134f..84bc69e2310 100644 --- a/tests/queries/0_stateless/02942_window_functions_logical_error.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions_bug.sql @@ -1,6 +1,3 @@ -DROP TABLE IF EXISTS posts; -DROP TABLE IF EXISTS post_metrics; - CREATE TABLE IF NOT EXISTS posts ( `page_id` LowCardinality(String), @@ -12,19 +9,7 @@ CREATE TABLE IF NOT EXISTS posts ) ENGINE = ReplacingMergeTree(as_of) PARTITION BY toStartOfMonth(created) -ORDER BY (page_id, post_id) -TTL created + toIntervalMonth(26); - - -INSERT INTO posts SELECT - repeat('a', (number % 10) + 1), - toString(number), - number % 10, - number, - now() - toIntervalMinute(number), - now() -FROM numbers(1000); - +ORDER BY (page_id, post_id); CREATE TABLE IF NOT EXISTS post_metrics ( @@ -37,61 +22,7 @@ CREATE TABLE IF NOT EXISTS post_metrics ) ENGINE = ReplacingMergeTree(as_of) PARTITION BY toStartOfMonth(created) -ORDER BY (page_id, post_id) -TTL created + toIntervalMonth(26); - - -INSERT INTO post_metrics SELECT - repeat('a', (number % 10) + 1), - toString(number), - now() - toIntervalMinute(number), - number * 100, - number * 10, - now() -FROM numbers(1000); - - -SELECT - host_id, - path_id, - max(rank) AS rank -FROM -( - WITH - as_of_posts AS - ( - SELECT - *, - row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num - FROM posts - WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0) - ), - as_of_post_metrics AS - ( - SELECT - *, - row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num - FROM post_metrics - WHERE created >= subtractHours(now(), 24) - ) - SELECT - page_id, - post_id, - host_id, - path_id, - impressions, - clicks, - ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank - FROM as_of_posts - GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num) - WHERE (row_num = 1) AND (impressions > 0) -) AS t -WHERE t.rank > 18 -GROUP BY - host_id, - path_id -ORDER BY host_id, path_id; - +ORDER BY (page_id, post_id); INSERT INTO posts SELECT repeat('a', (number % 10) + 1), @@ -102,7 +33,6 @@ INSERT INTO posts SELECT now() FROM numbers(100000); - INSERT INTO post_metrics SELECT repeat('a', (number % 10) + 1), toString(number), @@ -112,7 +42,6 @@ INSERT INTO post_metrics SELECT now() FROM numbers(100000); - SELECT host_id, path_id, @@ -152,7 +81,4 @@ WHERE t.rank > 18 GROUP BY host_id, path_id -ORDER BY host_id, path_id; - -DROP TABLE posts; -DROP TABLE post_metrics; +FORMAT Null; diff --git a/tests/queries/0_stateless/02887_insert_quorum_wo_keeper_retries.sql b/tests/queries/0_stateless/02887_insert_quorum_wo_keeper_retries.sql index 489d25d7433..3e75d415089 100644 --- a/tests/queries/0_stateless/02887_insert_quorum_wo_keeper_retries.sql +++ b/tests/queries/0_stateless/02887_insert_quorum_wo_keeper_retries.sql @@ -7,6 +7,7 @@ CREATE TABLE quorum1(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{d CREATE TABLE quorum2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02887/quorum', '2') ORDER BY x; SET insert_keeper_fault_injection_probability=0; +SET insert_keeper_max_retries = 0; SET insert_quorum = 2; system enable failpoint replicated_merge_tree_insert_quorum_fail_0; diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql index 6b2f146efd0..d8bfec12b3a 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql @@ -2,9 +2,9 @@ DROP TABLE IF EXISTS t1 SYNC; DROP TABLE IF EXISTS t2 SYNC; DROP TABLE IF EXISTS t3 SYNC; -CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k; -CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k; -CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k; +CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k; +CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k; +CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k; insert into t1 select number, toString(number) from numbers(1000, 1000); insert into t2 select number, toString(number) from numbers(2000, 1000); @@ -14,7 +14,7 @@ system sync replica t1; system sync replica t2; system sync replica t3; -SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; -- default coordinator SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b'; diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh index 9c922ec4723..029b4d07ee2 100755 --- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh @@ -29,7 +29,7 @@ $CLICKHOUSE_CLIENT \ --query_id "${query_id}" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --cluster_for_parallel_replicas "parallel_replicas" \ + --cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ --parallel_replicas_min_number_of_rows_per_replica 0 \ @@ -62,7 +62,7 @@ $CLICKHOUSE_CLIENT \ --query_id "${query_id}" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --cluster_for_parallel_replicas "parallel_replicas" \ + --cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ --parallel_replicas_min_number_of_rows_per_replica 0 \ diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.reference b/tests/queries/0_stateless/02919_storage_fuzzjson.reference index a134ce52c11..8f4ee4a5615 100644 --- a/tests/queries/0_stateless/02919_storage_fuzzjson.reference +++ b/tests/queries/0_stateless/02919_storage_fuzzjson.reference @@ -1,3 +1,4 @@ 100 100 100 +100 100 diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.sql b/tests/queries/0_stateless/02919_storage_fuzzjson.sql index 80b4a406a08..bf473f4b6b8 100644 --- a/tests/queries/0_stateless/02919_storage_fuzzjson.sql +++ b/tests/queries/0_stateless/02919_storage_fuzzjson.sql @@ -42,3 +42,24 @@ CREATE TABLE 02919_test_table_reuse_args(str String) ENGINE = FuzzJSON( SELECT count() FROM (SELECT * FROM 02919_test_table_reuse_args LIMIT 100); DROP TABLE IF EXISTS 02919_test_table_reuse_args; + +-- +DROP TABLE IF EXISTS 02919_test_table_invalid_col_type; +CREATE TABLE 02919_test_table_invalid_col_type +( + str Nullable(Int64) +) +ENGINE = FuzzJSON('{"pet":"rat"}', NULL); -- { serverError BAD_ARGUMENTS } + +DROP TABLE IF EXISTS 02919_test_table_invalid_col_type; + +-- +DROP TABLE IF EXISTS 02919_test_multi_col; +CREATE TABLE 02919_test_multi_col +( + str1 String, + str2 String +) ENGINE = FuzzJSON('{"pet":"rat"}', 999); + +SELECT count(str1), count(str2) FROM (SELECT str1, str2 FROM 02919_test_multi_col LIMIT 100); +DROP TABLE IF EXISTS 02919_test_multi_col; diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql index 987515527f0..a064c091df0 100644 --- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql +++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql @@ -11,7 +11,7 @@ SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=2, use_hedged_requests=0, - cluster_for_parallel_replicas='parallel_replicas', + cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1 ; diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.reference b/tests/queries/0_stateless/02932_apply_deleted_mask.reference new file mode 100644 index 00000000000..22499472f84 --- /dev/null +++ b/tests/queries/0_stateless/02932_apply_deleted_mask.reference @@ -0,0 +1,15 @@ +Inserted +100 4950 +10 100 0 +Lighweight deleted +86 4271 +10 100 10 +Mask applied +86 4271 +10 86 0 +Lighweight deleted +72 3578 +10 86 10 +Mask applied in partition +72 3578 +10 84 9 diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.sql b/tests/queries/0_stateless/02932_apply_deleted_mask.sql new file mode 100644 index 00000000000..0ada0640a8f --- /dev/null +++ b/tests/queries/0_stateless/02932_apply_deleted_mask.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_materialize_delete; + +CREATE TABLE t_materialize_delete (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id PARTITION BY id % 10; + +SET mutations_sync = 2; + +INSERT INTO t_materialize_delete SELECT number, number FROM numbers(100); + +SELECT 'Inserted'; + +SELECT count(), sum(v) FROM t_materialize_delete; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active; + +SELECT 'Lighweight deleted'; + +DELETE FROM t_materialize_delete WHERE id % 7 = 3; + +SELECT count(), sum(v) FROM t_materialize_delete; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active; + +SELECT 'Mask applied'; + +ALTER TABLE t_materialize_delete APPLY DELETED MASK; + +SELECT count(), sum(v) FROM t_materialize_delete; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active; + +SELECT 'Lighweight deleted'; + +DELETE FROM t_materialize_delete WHERE id % 7 = 4; + +SELECT count(), sum(v) FROM t_materialize_delete; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active; + +SELECT 'Mask applied in partition'; + +ALTER TABLE t_materialize_delete APPLY DELETED MASK IN PARTITION 5; + +SELECT count(), sum(v) FROM t_materialize_delete; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active; + +DROP TABLE t_materialize_delete; diff --git a/tests/queries/0_stateless/02932_lwd_and_mutations.reference b/tests/queries/0_stateless/02932_lwd_and_mutations.reference new file mode 100644 index 00000000000..dc0d3536b8f --- /dev/null +++ b/tests/queries/0_stateless/02932_lwd_and_mutations.reference @@ -0,0 +1,14 @@ +900 0 [1,2,3,4,5,6,7,8,9] +1 1000 1 +800 200 [2,3,4,5,6,7,8,9] +1 800 0 +700 150 [3,4,5,6,7,8,9] +1 800 1 +600 300 [4,5,6,7,8,9] +1 600 0 +400 200 [6,7,8,9] +1 500 1 +200 100 [8,9] +1 300 1 +200 100 [8,9] +1 200 0 diff --git a/tests/queries/0_stateless/02932_lwd_and_mutations.sql b/tests/queries/0_stateless/02932_lwd_and_mutations.sql new file mode 100644 index 00000000000..a68aca91764 --- /dev/null +++ b/tests/queries/0_stateless/02932_lwd_and_mutations.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_lwd_mutations; + +CREATE TABLE t_lwd_mutations(id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; +INSERT INTO t_lwd_mutations SELECT number, 0 FROM numbers(1000); + +SET mutations_sync = 2; + +DELETE FROM t_lwd_mutations WHERE id % 10 = 0; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +ALTER TABLE t_lwd_mutations UPDATE v = 1 WHERE id % 4 = 0, DELETE WHERE id % 10 = 1; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +DELETE FROM t_lwd_mutations WHERE id % 10 = 2; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +ALTER TABLE t_lwd_mutations UPDATE v = 1 WHERE id % 4 = 1, DELETE WHERE id % 10 = 3; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +ALTER TABLE t_lwd_mutations UPDATE _row_exists = 0 WHERE id % 10 = 4, DELETE WHERE id % 10 = 5; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +ALTER TABLE t_lwd_mutations DELETE WHERE id % 10 = 6, UPDATE _row_exists = 0 WHERE id % 10 = 7; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +ALTER TABLE t_lwd_mutations APPLY DELETED MASK; + +SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations; +SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active; + +DROP TABLE IF EXISTS t_lwd_mutations; diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference new file mode 100644 index 00000000000..7a39a221e08 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.reference @@ -0,0 +1,35 @@ +-- Negative tests +-- Regular cases +a a- a +A A- A +-- --- -- +London London- London +Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson +This has spaces This has spaces- This has spaces +-> $1.00 <- -> $1.00 <-- -> $1.00 <- +а 80a а +ü tda ü +α mxa α +例 fsq 例 +😉 n28h 😉 +αβγ mxacd αβγ +München Mnchen-3ya München +Mnchen-3ya Mnchen-3ya- Mnchen-3ya +München-Ost Mnchen-Ost-9db München-Ost +Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost +abæcdöef abcdef-qua4k abæcdöef +правда 80aafi6cg правда +ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข +ドメイン名例 eckwd4c7cu47r2wf ドメイン名例 +MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前 +「bücher」 bcher-kva8445foa 「bücher」 +团淄 3bs854c 团淄 +-- Special cases + + +\N +\N +Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +München Mnchen-3ya München +abc abc- abc +aäoöuü aou-qla5gqb aäoöuü diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql new file mode 100644 index 00000000000..dd18a43ecc9 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.sql @@ -0,0 +1,63 @@ +-- Tags: no-fasttest +-- no-fasttest: requires idna library + +SELECT '-- Negative tests'; + +SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN } +SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN } + +SELECT '-- Regular cases'; + +-- The test cases originate from the ada idna unit tests: +--- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt + +SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; +SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; + +SELECT '-- Special cases'; + +SELECT punycodeDecode(''); +SELECT punycodeEncode(''); +SELECT punycodeDecode(NULL); +SELECT punycodeEncode(NULL); + +-- garbage Punycode-encoded values +SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS } + +-- long input +SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original; + +-- non-const values +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str; +INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München'); +SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02932_query_settings_max_size_drop.reference b/tests/queries/0_stateless/02932_query_settings_max_size_drop.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql b/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql new file mode 100644 index 00000000000..1685861bd2e --- /dev/null +++ b/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql @@ -0,0 +1,31 @@ +CREATE TABLE test_max_size_drop +Engine = MergeTree() +ORDER BY number +AS SELECT number +FROM numbers(1000) +; + +DROP TABLE test_max_size_drop SETTINGS max_table_size_to_drop = 1; -- { serverError 359 } +DROP TABLE test_max_size_drop; + +CREATE TABLE test_max_size_drop +Engine = MergeTree() +ORDER BY number +AS SELECT number +FROM numbers(1000) +; + +ALTER TABLE test_max_size_drop DROP PARTITION tuple() SETTINGS max_partition_size_to_drop = 1; -- { serverError 359 } +ALTER TABLE test_max_size_drop DROP PARTITION tuple(); +DROP TABLE test_max_size_drop; + +CREATE TABLE test_max_size_drop +Engine = MergeTree() +ORDER BY number +AS SELECT number +FROM numbers(1000) +; + +ALTER TABLE test_max_size_drop DROP PART 'all_1_1_0' SETTINGS max_partition_size_to_drop = 1; -- { serverError 359 } +ALTER TABLE test_max_size_drop DROP PART 'all_1_1_0'; +DROP TABLE test_max_size_drop; diff --git a/tests/queries/0_stateless/02933_paste_join.reference b/tests/queries/0_stateless/02933_paste_join.reference new file mode 100644 index 00000000000..84ae5987926 --- /dev/null +++ b/tests/queries/0_stateless/02933_paste_join.reference @@ -0,0 +1,74 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +0 9 +1 8 +2 7 +3 6 +4 5 +5 4 +6 3 +7 2 +8 1 +9 0 +1 2 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 0 +7 1 +8 2 +9 3 +10 4 +0 0 +1 1 +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 4 4 4 +5 5 5 5 +6 6 6 6 +7 7 7 7 +8 8 8 8 +9 9 9 9 +10 10 10 10 +11 11 11 11 +12 12 12 12 +13 13 13 13 +14 14 14 14 +15 15 15 15 +16 16 16 16 +17 17 17 17 +18 18 18 18 +19 19 19 19 +20 20 20 20 +21 21 21 21 +22 22 22 22 +23 23 23 23 +24 24 24 24 +25 25 25 25 +26 26 26 26 +27 27 27 27 +28 28 28 28 +29 29 29 29 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 +UInt64 diff --git a/tests/queries/0_stateless/02933_paste_join.sql b/tests/queries/0_stateless/02933_paste_join.sql new file mode 100644 index 00000000000..1c346438d77 --- /dev/null +++ b/tests/queries/0_stateless/02933_paste_join.sql @@ -0,0 +1,37 @@ +select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10)) t2; +select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10) order by a desc) t2; +create table if not exists test (num UInt64) engine=Memory; +insert into test select number from numbers(6); +insert into test select number from numbers(5); +SELECT * FROM (SELECT 1) t1 PASTE JOIN (SELECT 2) SETTINGS joined_subquery_requires_alias=0; +select * from (SELECT number as a FROM numbers(11)) t1 PASTE JOIN test t2 SETTINGS max_threads=1; +select * from (SELECT number as a FROM numbers(11)) t1 PASTE JOIN (select * from test limit 2) t2 SETTINGs max_threads=1; +CREATE TABLE t1 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM numbers(0, 3); +INSERT INTO t1 SELECT number, number FROM numbers(3, 2); +INSERT INTO t1 SELECT number, number FROM numbers(5, 7); +INSERT INTO t1 SELECT number, number FROM numbers(12, 2); +INSERT INTO t1 SELECT number, number FROM numbers(14, 1); +INSERT INTO t1 SELECT number, number FROM numbers(15, 2); +INSERT INTO t1 SELECT number, number FROM numbers(17, 1); +INSERT INTO t1 SELECT number, number FROM numbers(18, 2); +INSERT INTO t1 SELECT number, number FROM numbers(20, 2); +INSERT INTO t1 SELECT number, number FROM numbers(22, 2); +INSERT INTO t1 SELECT number, number FROM numbers(24, 2); +INSERT INTO t1 SELECT number, number FROM numbers(26, 2); +INSERT INTO t1 SELECT number, number FROM numbers(28, 2); + + +CREATE TABLE t2 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t2 SELECT number, number FROM numbers(0, 2); +INSERT INTO t2 SELECT number, number FROM numbers(2, 3); +INSERT INTO t2 SELECT number, number FROM numbers(5, 5); +INSERT INTO t2 SELECT number, number FROM numbers(10, 5); +INSERT INTO t2 SELECT number, number FROM numbers(15, 15); + +SELECT * FROM ( SELECT * from t1 ) t1 PASTE JOIN ( SELECT * from t2 ) t2 SETTINGS max_threads = 1; +SELECT toTypeName(a) FROM (SELECT number as a FROM numbers(11)) t1 PASTE JOIN (select number as a from numbers(10)) t2 SETTINGS join_use_nulls = 1; +SET max_threads = 2; +select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } +select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } +select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=3; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh new file mode 100755 index 00000000000..c295f5be43b --- /dev/null +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: replica + +# CREATE AS SELECT for Replicated database is broken (https://github.com/ClickHouse/ClickHouse/issues/35408). +# This should be fixed and this test should eventually be deleted. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +# Non-replicated engines are allowed +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +# Replicated storafes are forbidden +${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" diff --git a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.reference b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.reference new file mode 100644 index 00000000000..f08b8ee767b --- /dev/null +++ b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.reference @@ -0,0 +1,5 @@ +thread = 0 +thread != 0 +Send signal to +thread_name = 'foo' +Send signal to 0 threads (total) diff --git a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh new file mode 100755 index 00000000000..0e23bb6c42b --- /dev/null +++ b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# NOTE: due to grep "Cannot obtain a stack trace for thread {}' will be ignored automatically, which is the intention. + +# no message at all +echo "thread = 0" +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' + +# send messages to some threads +echo "thread != 0" +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' + +# there is no thread with comm="foo", so no signals will be sent +echo "thread_name = 'foo'" +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.reference b/tests/queries/0_stateless/02942_window_functions_logical_error.reference deleted file mode 100644 index 73f8351d9df..00000000000 --- a/tests/queries/0_stateless/02942_window_functions_logical_error.reference +++ /dev/null @@ -1,216 +0,0 @@ -1 901 19 -1 911 19 -1 921 19 -1 931 19 -1 941 19 -1 951 20 -1 961 20 -1 971 20 -1 981 20 -1 991 20 -2 902 19 -2 912 19 -2 922 19 -2 932 19 -2 942 19 -2 952 20 -2 962 20 -2 972 20 -2 982 20 -2 992 20 -3 903 19 -3 913 19 -3 923 19 -3 933 19 -3 943 19 -3 953 20 -3 963 20 -3 973 20 -3 983 20 -3 993 20 -4 904 19 -4 914 19 -4 924 19 -4 934 19 -4 944 19 -4 954 20 -4 964 20 -4 974 20 -4 984 20 -4 994 20 -5 905 19 -5 915 19 -5 925 19 -5 935 19 -5 945 19 -5 955 20 -5 965 20 -5 975 20 -5 985 20 -5 995 20 -6 906 19 -6 916 19 -6 926 19 -6 936 19 -6 946 19 -6 956 20 -6 966 20 -6 976 20 -6 986 20 -6 996 20 -7 907 19 -7 917 19 -7 927 19 -7 937 19 -7 947 19 -7 957 20 -7 967 20 -7 977 20 -7 987 20 -7 997 20 -8 908 19 -8 918 19 -8 928 19 -8 938 19 -8 948 19 -8 958 20 -8 968 20 -8 978 20 -8 988 20 -8 998 20 -9 909 19 -9 919 19 -9 929 19 -9 939 19 -9 949 19 -9 959 20 -9 969 20 -9 979 20 -9 989 20 -9 999 20 -1 1301 19 -1 1311 19 -1 1321 19 -1 1331 19 -1 1341 19 -1 1351 19 -1 1361 19 -1 1371 20 -1 1381 20 -1 1391 20 -1 1401 20 -1 1411 20 -1 1421 20 -1 1431 20 -2 1302 19 -2 1312 19 -2 1322 19 -2 1332 19 -2 1342 19 -2 1352 19 -2 1362 19 -2 1372 20 -2 1382 20 -2 1392 20 -2 1402 20 -2 1412 20 -2 1422 20 -2 1432 20 -3 1303 19 -3 1313 19 -3 1323 19 -3 1333 19 -3 1343 19 -3 1353 19 -3 1363 19 -3 1373 20 -3 1383 20 -3 1393 20 -3 1403 20 -3 1413 20 -3 1423 20 -3 1433 20 -4 1304 19 -4 1314 19 -4 1324 19 -4 1334 19 -4 1344 19 -4 1354 19 -4 1364 19 -4 1374 20 -4 1384 20 -4 1394 20 -4 1404 20 -4 1414 20 -4 1424 20 -4 1434 20 -5 1305 19 -5 1315 19 -5 1325 19 -5 1335 19 -5 1345 19 -5 1355 19 -5 1365 19 -5 1375 20 -5 1385 20 -5 1395 20 -5 1405 20 -5 1415 20 -5 1425 20 -5 1435 20 -6 1306 19 -6 1316 19 -6 1326 19 -6 1336 19 -6 1346 19 -6 1356 19 -6 1366 19 -6 1376 20 -6 1386 20 -6 1396 20 -6 1406 20 -6 1416 20 -6 1426 20 -6 1436 20 -7 1307 19 -7 1317 19 -7 1327 19 -7 1337 19 -7 1347 19 -7 1357 19 -7 1367 19 -7 1377 20 -7 1387 20 -7 1397 20 -7 1407 20 -7 1417 20 -7 1427 20 -7 1437 20 -8 1308 19 -8 1318 19 -8 1328 19 -8 1338 19 -8 1348 19 -8 1358 19 -8 1368 19 -8 1378 20 -8 1388 20 -8 1398 20 -8 1408 20 -8 1418 20 -8 1428 20 -8 1438 20 -9 1309 19 -9 1319 19 -9 1329 19 -9 1339 19 -9 1349 19 -9 1359 19 -9 1369 19 -9 1379 20 -9 1389 20 -9 1399 20 -9 1409 20 -9 1419 20 -9 1429 20 -9 1439 20 diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference new file mode 100644 index 00000000000..48d828b6924 --- /dev/null +++ b/tests/queries/0_stateless/02943_order_by_all.reference @@ -0,0 +1,84 @@ +-- no modifiers +A 2 +B 3 +C \N +D 1 +1 D +2 A +3 B +\N C +A 2 +B 3 +C \N +D 1 +1 D +2 A +3 B +\N C +-- with ASC/DESC modifiers +A 2 +B 3 +C \N +D 1 +D 1 +C \N +B 3 +A 2 +A 2 +B 3 +C \N +D 1 +D 1 +C \N +B 3 +A 2 +-- with NULLS FIRST/LAST modifiers +\N C +1 D +2 A +3 B +1 D +2 A +3 B +\N C +\N C +1 D +2 A +3 B +1 D +2 A +3 B +\N C +-- what happens if some column "all" already exists? +B 3 10 +D 1 20 +A 2 30 +C \N 40 +B 3 10 +D 1 20 +A 2 30 +C \N 40 +D 1 +A 2 +B 3 +C \N +D 1 +A 2 +B 3 +C \N +A 2 +B 3 +D 1 +\N +A 2 +B 3 +D 1 +\N +B 3 10 +D 1 20 +A 2 30 +C \N 40 +B 3 10 +D 1 20 +A 2 30 +C \N 40 diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql new file mode 100644 index 00000000000..0756563946c --- /dev/null +++ b/tests/queries/0_stateless/02943_order_by_all.sql @@ -0,0 +1,89 @@ +-- Tests that sort expression ORDER BY ALL + +DROP TABLE IF EXISTS order_by_all; + +CREATE TABLE order_by_all +( + a String, + b Nullable(Int32), + all UInt64, +) +ENGINE = Memory; + +INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); + +SELECT '-- no modifiers'; + +SET allow_experimental_analyzer = 0; +SELECT a, b FROM order_by_all ORDER BY ALL; +SELECT b, a FROM order_by_all ORDER BY ALL; + +SET allow_experimental_analyzer = 1; +SELECT a, b FROM order_by_all ORDER BY ALL; +SELECT b, a FROM order_by_all ORDER BY ALL; + +SELECT '-- with ASC/DESC modifiers'; + +SET allow_experimental_analyzer = 0; +SELECT a, b FROM order_by_all ORDER BY ALL ASC; +SELECT a, b FROM order_by_all ORDER BY ALL DESC; + +SET allow_experimental_analyzer = 1; +SELECT a, b FROM order_by_all ORDER BY ALL ASC; +SELECT a, b FROM order_by_all ORDER BY ALL DESC; + +SELECT '-- with NULLS FIRST/LAST modifiers'; + +SET allow_experimental_analyzer = 0; +SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; +SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; + +SET allow_experimental_analyzer = 1; +SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; +SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; + +SELECT '-- what happens if some column "all" already exists?'; + +-- columns + +SET allow_experimental_analyzer = 0; +SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 1; +SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +-- column aliases + +SET allow_experimental_analyzer = 0; +SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 1; +SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +-- expressions + +SET allow_experimental_analyzer = 0; +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 1; +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } +SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SET allow_experimental_analyzer = 0; +SELECT a, b, all FROM order_by_all ORDER BY all, a; + +SET allow_experimental_analyzer = 1; +SELECT a, b, all FROM order_by_all ORDER BY all, a; + +DROP TABLE order_by_all; diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference new file mode 100644 index 00000000000..8620171cb99 --- /dev/null +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -0,0 +1,20 @@ +100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 1 +0 +10 +98 +set max_size from 100 to 10 +10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 1 +1 +8 +set max_size from 10 to 100 +100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 1 +10 +98 +set max_elements from 10 to 2 +100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 1 +2 +18 +set max_elements from 2 to 10 +100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 1 +10 +98 diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh new file mode 100755 index 00000000000..2e344a6b6e5 --- /dev/null +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +disk_name="s3_cache_02944" + +$CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" +$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" + +$CLICKHOUSE_CLIENT -nm --query " +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String) engine=MergeTree() ORDER BY tuple() SETTINGS disk = '$disk_name'; +INSERT INTO test SELECT randomString(100); +SYSTEM DROP FILESYSTEM CACHE; +" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" +$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" + +config_path=/etc/clickhouse-server/config.d/storage_conf_02944.xml +config_path_tmp=$config_path.tmp + +echo 'set max_size from 100 to 10' +cat $config_path \ +| sed "s|100<\/max_size>|10<\/max_size>|" \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" +$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" +$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" + +echo 'set max_size from 10 to 100' +cat $config_path \ +| sed "s|10<\/max_size>|100<\/max_size>|" \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" +$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" +$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" + +echo 'set max_elements from 10 to 2' +cat $config_path \ +| sed "s|10<\/max_elements>|2<\/max_elements>|" \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" +$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" +$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" + +echo 'set max_elements from 2 to 10' +cat $config_path \ +| sed "s|2<\/max_elements>|10<\/max_elements>|" \ +> $config_path_tmp +mv $config_path_tmp $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" +$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" +$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" diff --git a/tests/queries/0_stateless/02946_literal_alias_misclassification.reference b/tests/queries/0_stateless/02946_literal_alias_misclassification.reference new file mode 100644 index 00000000000..d8e5a437352 --- /dev/null +++ b/tests/queries/0_stateless/02946_literal_alias_misclassification.reference @@ -0,0 +1,2 @@ +const 1 +const 2 diff --git a/tests/queries/0_stateless/02946_literal_alias_misclassification.sql b/tests/queries/0_stateless/02946_literal_alias_misclassification.sql new file mode 100644 index 00000000000..0d001bf1e4c --- /dev/null +++ b/tests/queries/0_stateless/02946_literal_alias_misclassification.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS literal_alias_misclassification; + +CREATE TABLE literal_alias_misclassification +( + `id` Int64, + `a` Nullable(String), + `b` Nullable(Int64) +) +ENGINE = MergeTree +ORDER BY id; + + +INSERT INTO literal_alias_misclassification values(1, 'a', 1); +INSERT INTO literal_alias_misclassification values(2, 'b', 2); + +SELECT 'const' AS r, b +FROM + ( SELECT a AS r, b FROM literal_alias_misclassification ) AS t1 + LEFT JOIN + ( SELECT a AS r FROM literal_alias_misclassification ) AS t2 + ON t1.r = t2.r +ORDER BY b; + +DROP TABLE IF EXISTS literal_alias_misclassification; diff --git a/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql b/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql index 6c7fbd0f752..1afd4ff0192 100644 --- a/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql @@ -11,7 +11,7 @@ ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDat SELECT count(), sum(id) FROM test_d -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1; DROP TABLE test_d; DROP TABLE test; diff --git a/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference new file mode 100644 index 00000000000..64dfee7b7a1 --- /dev/null +++ b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference @@ -0,0 +1,6 @@ +1 750 +2 750 +3 750 +1 750 +2 750 +3 750 diff --git a/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql new file mode 100644 index 00000000000..d33c8cdbc93 --- /dev/null +++ b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS t1 SYNC; +DROP TABLE IF EXISTS t2 SYNC; +DROP TABLE IF EXISTS t3 SYNC; + +CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k; +CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k; +CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k; + +insert into t1 select number % 4, toString(number) from numbers(1000, 1000); +insert into t2 select number % 4, toString(number) from numbers(2000, 1000); +insert into t3 select number % 4, toString(number) from numbers(3000, 1000); + +system sync replica t1; +system sync replica t2; +system sync replica t3; + +-- w/o parallel replicas +SELECT + k, + count() +FROM t1 +WHERE k > 0 +GROUP BY k +ORDER BY k +SETTINGS force_primary_key = 1, allow_experimental_parallel_reading_from_replicas = 0; + +-- parallel replicas, primary key is used +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SELECT + k, + count() +FROM t1 +WHERE k > 0 +GROUP BY k +ORDER BY k +SETTINGS force_primary_key = 1; + +-- parallel replicas, primary key is NOT used +SELECT + k, + count() +FROM t1 +GROUP BY k +ORDER BY k +SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED } + +DROP TABLE t1 SYNC; +DROP TABLE t2 SYNC; +DROP TABLE t3 SYNC; diff --git a/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.reference b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.reference new file mode 100644 index 00000000000..9cdea62b413 --- /dev/null +++ b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.reference @@ -0,0 +1,2 @@ +Cannot execute query in readonly mode +Internal Server Error diff --git a/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.sh b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.sh new file mode 100755 index 00000000000..4250799b522 --- /dev/null +++ b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# This should fail +${CLICKHOUSE_CURL} -X GET -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Cannot execute query in readonly mode" + +# This should fail +${CLICKHOUSE_CURL} --head -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Internal Server Error" + +# This should pass - but will throw error "non_post_request_test already exists" if the database was created by any of the above requests. +${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'CREATE DATABASE non_post_request_test' +${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'DROP DATABASE non_post_request_test' diff --git a/tests/queries/0_stateless/02947_parallel_replicas_remote.reference b/tests/queries/0_stateless/02947_parallel_replicas_remote.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02947_parallel_replicas_remote.sql b/tests/queries/0_stateless/02947_parallel_replicas_remote.sql new file mode 100644 index 00000000000..345d9f9cb03 --- /dev/null +++ b/tests/queries/0_stateless/02947_parallel_replicas_remote.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (id UInt64, date Date) +ENGINE = MergeTree +ORDER BY id +AS select *, '2023-12-25' from numbers(100); + +SELECT count(), sum(id) +FROM remote('127.0.0.1|127.0.0.2|127.0.0.3|127.0.0.4', currentDatabase(), test) +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 4, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree = 1; -- { serverError CLUSTER_DOESNT_EXIST } + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql new file mode 100644 index 00000000000..002d696e62f --- /dev/null +++ b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql @@ -0,0 +1,7 @@ +# There was a wrong, harmful feature, leading to bugs and data corruption. +# This feature is removed, but we take care to maintain compatibility on the syntax level, so now it works as a no-op. + +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt8, PRIMARY KEY x) ENGINE = ReplacingMergeTree; +OPTIMIZE TABLE t CLEANUP; +DROP TABLE t; diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference new file mode 100644 index 00000000000..4d33751c699 --- /dev/null +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference @@ -0,0 +1,8 @@ +--- +2 test2 8 +3 test3 8 +4 test4 1985 +--- +1 test1 42 +--- +3 test3 diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql new file mode 100644 index 00000000000..53b8a761cda --- /dev/null +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS merge_tree_in_subqueries; +CREATE TABLE merge_tree_in_subqueries (id UInt64, name String, num UInt64) ENGINE = MergeTree ORDER BY (id, name); +INSERT INTO merge_tree_in_subqueries VALUES(1, 'test1', 42); +INSERT INTO merge_tree_in_subqueries VALUES(2, 'test2', 8); +INSERT INTO merge_tree_in_subqueries VALUES(3, 'test3', 8); +INSERT INTO merge_tree_in_subqueries VALUES(4, 'test4', 1985); +INSERT INTO merge_tree_in_subqueries VALUES(5, 'test5', 0); + +SET max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1; + +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=1; + +SELECT '---'; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; + +SELECT '---'; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; + +-- IN with tuples is allowed +SELECT '---'; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2; + +DROP TABLE IF EXISTS merge_tree_in_subqueries; diff --git a/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.reference b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.reference new file mode 100644 index 00000000000..97bd2c20556 --- /dev/null +++ b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.reference @@ -0,0 +1 @@ +6 111111111111111111111111111111111111111 diff --git a/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.sql b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.sql new file mode 100644 index 00000000000..26f87180ab2 --- /dev/null +++ b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY x; +INSERT INTO test VALUES (1), (2), (3); + +SET allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree = 1; + +WITH (SELECT '111111111111111111111111111111111111111'::UInt128) AS v SELECT sum(x), max(v) FROM test; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02950_obfuscator_keywords_more.reference b/tests/queries/0_stateless/02950_obfuscator_keywords_more.reference new file mode 100644 index 00000000000..7c3fcea85ea --- /dev/null +++ b/tests/queries/0_stateless/02950_obfuscator_keywords_more.reference @@ -0,0 +1 @@ +CREATE TABLE test (pill DateTime('UTC'), tart DateTime('Europe/Amsterdam')) ENGINE = ReplicatedVersionedCollapsingMergeTree ORDER BY pill SETTINGS index_granularity = 15414; diff --git a/tests/queries/0_stateless/02950_obfuscator_keywords_more.sh b/tests/queries/0_stateless/02950_obfuscator_keywords_more.sh new file mode 100755 index 00000000000..fb0e7c178e2 --- /dev/null +++ b/tests/queries/0_stateless/02950_obfuscator_keywords_more.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +obf="$CLICKHOUSE_FORMAT --obfuscate" + +echo "CREATE TABLE test (secret1 DateTime('UTC'), secret2 DateTime('Europe/Amsterdam')) ENGINE = ReplicatedVersionedCollapsingMergeTree ORDER BY secret1 SETTINGS index_granularity = 8192;" | $obf diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference new file mode 100644 index 00000000000..21b7b527b7a --- /dev/null +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference @@ -0,0 +1,8 @@ +100 4950 +1 +89 +90 +91 +92 +93 +1 diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql new file mode 100644 index 00000000000..22f55acd365 --- /dev/null +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (k UInt64, v String) +ENGINE = MergeTree +ORDER BY k; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +-- default coordinator +SELECT count(), sum(k) +FROM test +SETTINGS log_comment = '02950_parallel_replicas_used_replicas_count'; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; + +-- In order coordinator +SELECT k FROM test order by k limit 5 offset 89 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_2'; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_2' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference new file mode 100644 index 00000000000..abdcc960be3 --- /dev/null +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference @@ -0,0 +1,7 @@ +NewPart part_log_bytes_uncompressed all_1_1_0 1 1 +MergeParts part_log_bytes_uncompressed all_1_2_1 1 1 +MutatePart part_log_bytes_uncompressed all_1_2_1_3 1 1 +NewPart part_log_bytes_uncompressed all_2_2_0 1 1 +NewPart part_log_bytes_uncompressed all_4_4_0 1 1 +RemovePart part_log_bytes_uncompressed all_4_4_0 1 1 +NewPart part_log_bytes_uncompressed all_4_4_1 0 0 diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql new file mode 100644 index 00000000000..0c2cef6e004 --- /dev/null +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql @@ -0,0 +1,24 @@ +CREATE TABLE part_log_bytes_uncompressed ( + key UInt8, + value UInt8 +) +Engine=MergeTree() +ORDER BY key; + +INSERT INTO part_log_bytes_uncompressed SELECT 1, 1 FROM numbers(1000); +INSERT INTO part_log_bytes_uncompressed SELECT 2, 1 FROM numbers(1000); + +OPTIMIZE TABLE part_log_bytes_uncompressed FINAL; + +ALTER TABLE part_log_bytes_uncompressed UPDATE value = 3 WHERE 1 = 1 SETTINGS mutations_sync=2; + +INSERT INTO part_log_bytes_uncompressed SELECT 3, 1 FROM numbers(1000); +ALTER TABLE part_log_bytes_uncompressed DROP PART 'all_4_4_0' SETTINGS mutations_sync=2; + +SYSTEM FLUSH LOGS; + +SELECT event_type, table, part_name, bytes_uncompressed > 0, size_in_bytes < bytes_uncompressed FROM system.part_log +WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'part_log_bytes_uncompressed' +ORDER BY part_name, event_type; + +DROP TABLE part_log_bytes_uncompressed; diff --git a/tests/queries/0_stateless/02950_part_offset_as_primary_key.reference b/tests/queries/0_stateless/02950_part_offset_as_primary_key.reference new file mode 100644 index 00000000000..368f8dd9871 --- /dev/null +++ b/tests/queries/0_stateless/02950_part_offset_as_primary_key.reference @@ -0,0 +1,14 @@ +-4 +-3 +-2 +-1 +0 +-3 +0 +-4 +-2 +-1 +0 +10 +40 +400 diff --git a/tests/queries/0_stateless/02950_part_offset_as_primary_key.sql b/tests/queries/0_stateless/02950_part_offset_as_primary_key.sql new file mode 100644 index 00000000000..736d54023ce --- /dev/null +++ b/tests/queries/0_stateless/02950_part_offset_as_primary_key.sql @@ -0,0 +1,40 @@ +drop table if exists a; + +create table a (i int) engine MergeTree order by i settings index_granularity = 2; +insert into a select -number from numbers(5); + +-- nothing to read +select i from a where _part_offset >= 5 order by i settings max_bytes_to_read = 1; + +-- one granule +select i from a where _part_offset = 0 order by i settings max_rows_to_read = 2; +select i from a where _part_offset = 1 order by i settings max_rows_to_read = 2; +select i from a where _part_offset = 2 order by i settings max_rows_to_read = 2; +select i from a where _part_offset = 3 order by i settings max_rows_to_read = 2; +select i from a where _part_offset = 4 order by i settings max_rows_to_read = 1; + +-- other predicates +select i from a where _part_offset in (1, 4) order by i settings max_rows_to_read = 3; +select i from a where _part_offset not in (1, 4) order by i settings max_rows_to_read = 4; + +-- the force_primary_key check still works +select i from a where _part_offset = 4 order by i settings force_primary_key = 1; -- { serverError INDEX_NOT_USED } + +-- combining with other primary keys doesn't work (makes no sense) +select i from a where i = -3 or _part_offset = 4 order by i settings force_primary_key = 1; -- { serverError INDEX_NOT_USED } + +drop table a; + +drop table if exists b; + +create table b (i int) engine MergeTree order by tuple() settings index_granularity = 2; + +-- all_1_1_0 +insert into b select number * 10 from numbers(5); +-- all_2_2_0 +insert into b select number * 100 from numbers(5); + +-- multiple parts with _part predicate +select i from b where (_part = 'all_1_1_0' and _part_offset in (1, 4)) or (_part = 'all_2_2_0' and _part_offset in (0, 4)) order by i settings max_rows_to_read = 6; + +drop table b; diff --git a/tests/queries/0_stateless/02951_data.jsonl.zst b/tests/queries/0_stateless/02951_data.jsonl.zst new file mode 100644 index 00000000000..9701cdd5f6e Binary files /dev/null and b/tests/queries/0_stateless/02951_data.jsonl.zst differ diff --git a/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.reference b/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.reference new file mode 100644 index 00000000000..0953b633db6 --- /dev/null +++ b/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.reference @@ -0,0 +1 @@ +15021837090950060251 diff --git a/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.sh b/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.sh new file mode 100755 index 00000000000..bdaac0e0c50 --- /dev/null +++ b/tests/queries/0_stateless/02951_parallel_parsing_json_compact_each_row.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --input-format-parallel-parsing 1 --query " + SELECT sum(cityHash64(*)) FROM file('$CUR_DIR/02951_data.jsonl.zst', JSONCompactEachRow, ' + time_offset Decimal64(3), + lat Float64, + lon Float64, + altitude String, + ground_speed Float32, + track_degrees Float32, + flags UInt32, + vertical_rate Int32, + aircraft Tuple( + alert Int64, + alt_geom Int64, + gva Int64, + nac_p Int64, + nac_v Int64, + nic Int64, + nic_baro Int64, + rc Int64, + sda Int64, + sil Int64, + sil_type String, + spi Int64, + track Float64, + type String, + version Int64, + category String, + emergency String, + flight String, + squawk String, + baro_rate Int64, + nav_altitude_fms Int64, + nav_altitude_mcp Int64, + nav_modes Array(String), + nav_qnh Float64, + geom_rate Int64, + ias Int64, + mach Float64, + mag_heading Float64, + oat Int64, + roll Float64, + tas Int64, + tat Int64, + true_heading Float64, + wd Int64, + ws Int64, + track_rate Float64, + nav_heading Float64 + ), + source LowCardinality(String), + geometric_altitude Int32, + geometric_vertical_rate Int32, + indicated_airspeed Int32, + roll_angle Float32, + hex String + ')" diff --git a/tests/queries/0_stateless/02952_archive_parsing.reference b/tests/queries/0_stateless/02952_archive_parsing.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02952_archive_parsing.sql b/tests/queries/0_stateless/02952_archive_parsing.sql new file mode 100644 index 00000000000..49b0223e6ec --- /dev/null +++ b/tests/queries/0_stateless/02952_archive_parsing.sql @@ -0,0 +1 @@ +SELECT * FROM file('::a'); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02952_binary.reference b/tests/queries/0_stateless/02952_binary.reference new file mode 100644 index 00000000000..8205460df96 --- /dev/null +++ b/tests/queries/0_stateless/02952_binary.reference @@ -0,0 +1 @@ +addressToSymbol diff --git a/tests/queries/0_stateless/02952_binary.sh b/tests/queries/0_stateless/02952_binary.sh new file mode 100755 index 00000000000..c55df1a80b1 --- /dev/null +++ b/tests/queries/0_stateless/02952_binary.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -s "${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/binary" 2>/dev/null | grep -oF --max-count 1 'addressToSymbol' diff --git a/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.reference b/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.sh b/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.sh new file mode 100755 index 00000000000..5e9efbbf3ad --- /dev/null +++ b/tests/queries/0_stateless/02952_clickhouse_local_query_parameters_cli.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --param_x 1 -q "SELECT {x:UInt64}, {x:String};" diff --git a/tests/queries/0_stateless/02953_slow_create_view.reference b/tests/queries/0_stateless/02953_slow_create_view.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02953_slow_create_view.sql b/tests/queries/0_stateless/02953_slow_create_view.sql new file mode 100644 index 00000000000..7824bd97b92 --- /dev/null +++ b/tests/queries/0_stateless/02953_slow_create_view.sql @@ -0,0 +1,44 @@ +drop view if exists slow_view1; + +create view slow_view1 as +with c1 as (select 1 as a), + c2 as (select a from c1), + c3 as (select a from c2), + c4 as (select a from c3), + c5 as (select a from c4), + c6 as (select a from c5), + c7 as (select a from c6), + c8 as (select a from c7), + c9 as (select a from c8), + c10 as (select a from c9), + c11 as (select a from c10), + c12 as (select a from c11), + c13 as (select a from c12), + c14 as (select a from c13), + c15 as (select a from c14), + c16 as (select a from c15), + c17 as (select a from c16), + c18 as (select a from c17), + c19 as (select a from c18), + c20 as (select a from c19), + c21 as (select a from c20), + c22 as (select a from c21), + c23 as (select a from c22), + c24 as (select a from c23), + c25 as (select a from c24), + c26 as (select a from c25), + c27 as (select a from c26), + c28 as (select a from c27), + c29 as (select a from c28), + c30 as (select a from c29), + c31 as (select a from c30), + c32 as (select a from c31), + c33 as (select a from c32), + c34 as (select a from c33), + c35 as (select a from c34), + c36 as (select a from c35), + c37 as (select a from c36), + c38 as (select a from c37), + c39 as (select a from c38), + c40 as (select a from c39) +select a from c21; diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql index c826a129b2a..157d5892ad8 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql @@ -1,5 +1,6 @@ SET compile_aggregate_expressions = 1; SET min_count_to_compile_aggregate_expression = 0; +SET max_bytes_before_external_group_by='200M'; -- might be randomized to 1 leading to timeout SELECT 'Aggregation using JIT compilation'; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 637ab0ce6d4..35afb8185fb 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -502,6 +502,7 @@ Memcheck MemoryCode MemoryDataAndStack MemoryResident +MemoryResidentMax MemorySanitizer MemoryShared MemoryTracking @@ -711,6 +712,7 @@ Promtail Protobuf ProtobufSingle ProxySQL +Punycode PyArrow PyCharm QEMU @@ -1850,6 +1852,8 @@ mininum miniselect minmap minmax +minSampleSizeContinuous +minSampleSizeConversion mins misconfiguration mispredictions @@ -2069,6 +2073,8 @@ pseudorandom pseudorandomize psql ptrs +punycodeDecode +punycodeEncode pushdown pwrite py diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f319f57e0b9..de3accea617 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.11.3.23-stable 2023-12-21 v23.11.2.11-stable 2023-12-13 v23.11.1.2711-stable 2023-12-06 v23.10.5.20-stable 2023-11-25