diff --git a/.gitmodules b/.gitmodules index 53ef899dd99..68016bf8c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -245,6 +245,12 @@ [submodule "contrib/idxd-config"] path = contrib/idxd-config url = https://github.com/intel/idxd-config +[submodule "contrib/QAT-ZSTD-Plugin"] + path = contrib/QAT-ZSTD-Plugin + url = https://github.com/intel/QAT-ZSTD-Plugin +[submodule "contrib/qatlib"] + path = contrib/qatlib + url = https://github.com/intel/qatlib [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash @@ -360,3 +366,6 @@ [submodule "contrib/sqids-cpp"] path = contrib/sqids-cpp url = https://github.com/sqids/sqids-cpp.git +[submodule "contrib/idna"] + path = contrib/idna + url = https://github.com/ada-url/idna.git diff --git a/README.md b/README.md index c56b3c2fd0d..d356e429892 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. +Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 2405ba9ca0d..afa186faf5b 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -99,7 +99,7 @@ public: }; } -constexpr DB::UInt64 max_uint_mask = std::numeric_limits::max(); +constexpr UInt64 max_uint_mask = std::numeric_limits::max(); namespace std { @@ -114,8 +114,8 @@ namespace std { size_t operator()(const DB::Decimal128 & x) const { - return std::hash()(x.value >> 64) - ^ std::hash()(x.value & max_uint_mask); + return std::hash()(x.value >> 64) + ^ std::hash()(x.value & max_uint_mask); } }; @@ -134,8 +134,8 @@ namespace std size_t operator()(const DB::Decimal256 & x) const { // FIXME temp solution - return std::hash()(static_cast(x.value >> 64 & max_uint_mask)) - ^ std::hash()(static_cast(x.value & max_uint_mask)); + return std::hash()(static_cast(x.value >> 64 & max_uint_mask)) + ^ std::hash()(static_cast(x.value & max_uint_mask)); } }; } diff --git a/base/base/types.h b/base/base/types.h index 5825c8ae7ad..3a7760eae91 100644 --- a/base/base/types.h +++ b/base/base/types.h @@ -3,15 +3,6 @@ #include #include -using Int8 = int8_t; -using Int16 = int16_t; -using Int32 = int32_t; -using Int64 = int64_t; - -#ifndef __cpp_char8_t -using char8_t = unsigned char; -#endif - /// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713 using UInt8 = char8_t; @@ -19,24 +10,12 @@ using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; -using String = std::string; - -namespace DB -{ - -using UInt8 = ::UInt8; -using UInt16 = ::UInt16; -using UInt32 = ::UInt32; -using UInt64 = ::UInt64; - -using Int8 = ::Int8; -using Int16 = ::Int16; -using Int32 = ::Int32; -using Int64 = ::Int64; +using Int8 = int8_t; +using Int16 = int16_t; +using Int32 = int32_t; +using Int64 = int64_t; using Float32 = float; using Float64 = double; using String = std::string; - -} diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 3f7a8498059..3882b51227e 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -82,3 +82,4 @@ if (SANITIZE_COVERAGE) endif() set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") +set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1b5ba15187f..c6d1dcb41e6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx) add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) +add_contrib (idna-cmake idna) add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (incbin-cmake incbin) add_contrib (sqids-cpp-cmake sqids-cpp) @@ -171,9 +172,9 @@ add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) - option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) + option (ENABLE_QPL "Enable Intel® Query Processing Library (QPL)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher") + message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 with SSE 4.2 or higher") endif() if (ENABLE_QPL) add_contrib (idxd-config-cmake idxd-config) @@ -182,6 +183,28 @@ else() message(STATUS "Not using QPL") endif () +if (OS_LINUX AND ARCH_AMD64) + option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) +elseif(ENABLE_QATLIB) + message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") +endif() +if (ENABLE_QATLIB) + option (ENABLE_QAT_USDM_DRIVER "A User Space DMA-able Memory (USDM) component which allocates/frees DMA-able memory" OFF) + option (ENABLE_QAT_OUT_OF_TREE_BUILD "Using out-of-tree driver, user needs to customize ICP_ROOT variable" OFF) + set(ICP_ROOT "" CACHE STRING "ICP_ROOT variable to define the path of out-of-tree driver package") + if (ENABLE_QAT_OUT_OF_TREE_BUILD) + if (ICP_ROOT STREQUAL "") + message(FATAL_ERROR "Please define the path of out-of-tree driver package with -DICP_ROOT=xxx or disable out-of-tree build with -DENABLE_QAT_OUT_OF_TREE_BUILD=OFF; \ + If you want out-of-tree build but have no package available, please download and build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html") + endif () + else() + add_contrib (qatlib-cmake qatlib) # requires: isa-l + endif () + add_contrib (QAT-ZSTD-Plugin-cmake QAT-ZSTD-Plugin) +else() + message(STATUS "Not using QATLib") +endif () + add_contrib (morton-nd-cmake morton-nd) if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) diff --git a/contrib/NuRaft b/contrib/NuRaft index 2f5f52c4d8c..1278e32bb0d 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 2f5f52c4d8c87c2a3a3d101ca3a0194c9b77526f +Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63 diff --git a/contrib/QAT-ZSTD-Plugin b/contrib/QAT-ZSTD-Plugin new file mode 160000 index 00000000000..e5a134e12d2 --- /dev/null +++ b/contrib/QAT-ZSTD-Plugin @@ -0,0 +1 @@ +Subproject commit e5a134e12d2ea8a5b0f3b83c5b1c325fda4eb0a8 diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..72d21a8572b --- /dev/null +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -0,0 +1,85 @@ +# Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a plugin to Zstandard*(ZSTD*) for accelerating compression by QAT. +# ENABLE_QAT_OUT_OF_TREE_BUILD = 1 means kernel don't have native support, user will build and install driver from external package: https://www.intel.com/content/www/us/en/download/765501.html +# meanwhile, user need to set ICP_ROOT environment variable which point to the root directory of QAT driver source tree. +# ENABLE_QAT_OUT_OF_TREE_BUILD = 0 means kernel has built-in qat driver, QAT-ZSTD-PLUGIN just has dependency on qatlib. + +if (ENABLE_QAT_OUT_OF_TREE_BUILD) + message(STATUS "Intel QATZSTD out-of-tree build, ICP_ROOT:${ICP_ROOT}") + + set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") + set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") + set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") + set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") + set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") + set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") + set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") + set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") + set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") + if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) + endif() + add_library(_qatzstd_plugin ${QATZSTD_SRC}) + target_link_libraries (_qatzstd_plugin PUBLIC ${USDM_LIBRARY} ${QAT_S_LIBRARY}) + target_include_directories(_qatzstd_plugin + SYSTEM PUBLIC "${QATZSTD_SRC_DIR}" + PRIVATE ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR}) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC) + add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) +else () # In-tree build + message(STATUS "Intel QATZSTD in-tree build") + set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") + set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") + set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") + + # please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html + set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") + set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") + set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") + set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") + set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") + set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") + set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") + set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") + set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") + + file(MAKE_DIRECTORY + "${LIBQAT_HEADER_DIR}/qat" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + + if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) + endif() + + add_library(_qatzstd_plugin ${QATZSTD_SRC}) + target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) + target_include_directories(_qatzstd_plugin PRIVATE + ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR} + ${LIBQAT_HEADER_DIR}) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE) + target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) + add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) +endif () + diff --git a/contrib/azure b/contrib/azure index 060c54dfb0a..e71395e44f3 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3 +Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2 diff --git a/contrib/idna b/contrib/idna new file mode 160000 index 00000000000..3c8be01d42b --- /dev/null +++ b/contrib/idna @@ -0,0 +1 @@ +Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667 diff --git a/contrib/idna-cmake/CMakeLists.txt b/contrib/idna-cmake/CMakeLists.txt new file mode 100644 index 00000000000..1138b836192 --- /dev/null +++ b/contrib/idna-cmake/CMakeLists.txt @@ -0,0 +1,24 @@ +option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES}) +if ((NOT ENABLE_IDNA)) + message (STATUS "Not using idna") + return() +endif() +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna") + +set (SRCS + "${LIBRARY_DIR}/src/idna.cpp" + "${LIBRARY_DIR}/src/mapping.cpp" + "${LIBRARY_DIR}/src/mapping_tables.cpp" + "${LIBRARY_DIR}/src/normalization.cpp" + "${LIBRARY_DIR}/src/normalization_tables.cpp" + "${LIBRARY_DIR}/src/punycode.cpp" + "${LIBRARY_DIR}/src/to_ascii.cpp" + "${LIBRARY_DIR}/src/to_unicode.cpp" + "${LIBRARY_DIR}/src/unicode_transcoding.cpp" + "${LIBRARY_DIR}/src/validity.cpp" +) + +add_library (_idna ${SRCS}) +target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include") + +add_library (ch_contrib::idna ALIAS _idna) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 15e965ed841..f85a38dcf8a 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) +# Because our coverage callbacks call malloc, and recursive call of malloc could not work. +target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST}) + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_DEBUG=1 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index c77d5d8319e..60c9d6c4d90 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -33,7 +33,6 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/optional.cpp" "${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" -"${LIBCXX_SOURCE_DIR}/src/regex.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" diff --git a/contrib/llvm-project b/contrib/llvm-project index 1834e42289c..2568a7cd129 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec +Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856 diff --git a/contrib/qatlib b/contrib/qatlib new file mode 160000 index 00000000000..abe15d7bfc0 --- /dev/null +++ b/contrib/qatlib @@ -0,0 +1 @@ +Subproject commit abe15d7bfc083117bfbb4baee0b49ffcd1c03c5c diff --git a/contrib/qatlib-cmake/CMakeLists.txt b/contrib/qatlib-cmake/CMakeLists.txt new file mode 100644 index 00000000000..d599775035a --- /dev/null +++ b/contrib/qatlib-cmake/CMakeLists.txt @@ -0,0 +1,213 @@ +# Intel® QuickAssist Technology Library (QATlib). + +message(STATUS "Intel QATlib ON") +set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(LIBQAT_DIR "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src") +set(LIBOSAL_DIR "${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src") +set(OPENSSL_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl") + +# Build 3 libraries: _qatmgr, _osal, _qatlib +# Produce ch_contrib::qatlib by linking these libraries. + +# _qatmgr + +SET(LIBQATMGR_sources ${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_client.c + ${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_lib.c + ${LIBQAT_DIR}/qat_direct/vfio/qat_log.c + ${LIBQAT_DIR}/qat_direct/vfio/vfio_lib.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_proto.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_vf_msg.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_pf.c) + +add_library(_qatmgr ${LIBQATMGR_sources}) + +target_include_directories(_qatmgr PRIVATE + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include + ${LIBQAT_ROOT_DIR}/quickassist/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include) + +target_compile_definitions(_qatmgr PRIVATE -DUSER_SPACE) +target_compile_options(_qatmgr PRIVATE -Wno-error=int-conversion) + +# _osal + +SET(LIBOSAL_sources + ${LIBOSAL_DIR}/linux/user_space/OsalSemaphore.c + ${LIBOSAL_DIR}/linux/user_space/OsalThread.c + ${LIBOSAL_DIR}/linux/user_space/OsalMutex.c + ${LIBOSAL_DIR}/linux/user_space/OsalSpinLock.c + ${LIBOSAL_DIR}/linux/user_space/OsalAtomic.c + ${LIBOSAL_DIR}/linux/user_space/OsalServices.c + ${LIBOSAL_DIR}/linux/user_space/OsalUsrKrnProxy.c + ${LIBOSAL_DIR}/linux/user_space/OsalCryptoInterface.c) + +add_library(_osal ${LIBOSAL_sources}) + +target_include_directories(_osal PRIVATE + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${OPENSSL_DIR}/include + ${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include + ${ClickHouse_SOURCE_DIR}/contrib/qatlib-cmake/include) + +target_compile_definitions(_osal PRIVATE -DOSAL_ENSURE_ON -DUSE_OPENSSL) + +# _qatlib +SET(LIBQAT_sources + ${LIBQAT_DIR}/common/compression/dc_buffers.c + ${LIBQAT_DIR}/common/compression/dc_chain.c + ${LIBQAT_DIR}/common/compression/dc_datapath.c + ${LIBQAT_DIR}/common/compression/dc_dp.c + ${LIBQAT_DIR}/common/compression/dc_header_footer.c + ${LIBQAT_DIR}/common/compression/dc_header_footer_lz4.c + ${LIBQAT_DIR}/common/compression/dc_session.c + ${LIBQAT_DIR}/common/compression/dc_stats.c + ${LIBQAT_DIR}/common/compression/dc_err_sim.c + ${LIBQAT_DIR}/common/compression/dc_ns_datapath.c + ${LIBQAT_DIR}/common/compression/dc_ns_header_footer.c + ${LIBQAT_DIR}/common/compression/dc_crc32.c + ${LIBQAT_DIR}/common/compression/dc_crc64.c + ${LIBQAT_DIR}/common/compression/dc_xxhash32.c + ${LIBQAT_DIR}/common/compression/icp_sal_dc_err_sim.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_control_path.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_data_path.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_stats.c + ${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa.c + ${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_common.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_montedwds.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_nist_curves.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdh.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdsa.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecsm2.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_kpt_ecdsa.c + ${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln.c + ${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_mmp.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_qat_comms.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_utils.c + ${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime.c + ${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_control_path.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_decrypt.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_encrypt.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_keygen.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_stats.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_kpt_rsa_decrypt.c + ${LIBQAT_DIR}/common/crypto/sym/drbg/lac_sym_drbg_api.c + ${LIBQAT_DIR}/common/crypto/sym/key/lac_sym_key.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_alg_chain.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_api.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_auth_enc.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_cb.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_cipher.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_compile_check.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_dp.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_partial.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_queue.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_stats.c + ${LIBQAT_DIR}/common/crypto/sym/nrbg/lac_sym_nrbg_api.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_cipher.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_constants_table.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash_defs_lookup.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_key.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash_sw_precomputes.c + ${LIBQAT_DIR}/common/crypto/kpt/provision/lac_kpt_provision.c + ${LIBQAT_DIR}/common/ctrl/sal_compression.c + ${LIBQAT_DIR}/common/ctrl/sal_create_services.c + ${LIBQAT_DIR}/common/ctrl/sal_ctrl_services.c + ${LIBQAT_DIR}/common/ctrl/sal_list.c + ${LIBQAT_DIR}/common/ctrl/sal_crypto.c + ${LIBQAT_DIR}/common/ctrl/sal_dc_chain.c + ${LIBQAT_DIR}/common/ctrl/sal_instances.c + ${LIBQAT_DIR}/common/qat_comms/sal_qat_cmn_msg.c + ${LIBQAT_DIR}/common/utils/lac_buffer_desc.c + ${LIBQAT_DIR}/common/utils/lac_log_message.c + ${LIBQAT_DIR}/common/utils/lac_mem.c + ${LIBQAT_DIR}/common/utils/lac_mem_pools.c + ${LIBQAT_DIR}/common/utils/lac_sw_responses.c + ${LIBQAT_DIR}/common/utils/lac_sync.c + ${LIBQAT_DIR}/common/utils/sal_service_state.c + ${LIBQAT_DIR}/common/utils/sal_statistics.c + ${LIBQAT_DIR}/common/utils/sal_misc_error_stats.c + ${LIBQAT_DIR}/common/utils/sal_string_parse.c + ${LIBQAT_DIR}/common/utils/sal_user_process.c + ${LIBQAT_DIR}/common/utils/sal_versions.c + ${LIBQAT_DIR}/common/device/sal_dev_info.c + ${LIBQAT_DIR}/user/sal_user.c + ${LIBQAT_DIR}/user/sal_user_dyn_instance.c + ${LIBQAT_DIR}/qat_direct/common/adf_process_proxy.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_cfg.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_device.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_dyn.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_ETring_mgr_dp.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_init.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_ring.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_transport_ctrl.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_cfg.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_ring.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_bundles.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_proxy.c + ${LIBQAT_DIR}/common/compression/dc_crc_base.c) + +add_library(_qatlib ${LIBQAT_sources}) + +target_include_directories(_qatlib PRIVATE + ${CMAKE_SYSROOT}/usr/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBOSAL_DIR}/linux/user_space/include + ${LIBQAT_ROOT_DIR}/quickassist/include + ${LIBQAT_ROOT_DIR}/quickassist/include/lac + ${LIBQAT_ROOT_DIR}/quickassist/include/dc + ${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/compression/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/sym/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/asym/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/firmware/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include) + +target_link_libraries(_qatlib PRIVATE _qatmgr _osal OpenSSL::SSL ch_contrib::isal) +target_compile_definitions(_qatlib PRIVATE -DUSER_SPACE -DLAC_BYTE_ORDER=__LITTLE_ENDIAN -DOSAL_ENSURE_ON) +target_link_options(_qatlib PRIVATE -pie -z relro -z now -z noexecstack) +target_compile_options(_qatlib PRIVATE -march=native) +add_library (ch_contrib::qatlib ALIAS _qatlib) + +# _usdm + +set(LIBUSDM_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib/quickassist/utilities/libusdm_drv") +set(LIBUSDM_sources + ${LIBUSDM_DIR}/user_space/vfio/qae_mem_utils_vfio.c + ${LIBUSDM_DIR}/user_space/qae_mem_utils_common.c + ${LIBUSDM_DIR}/user_space/vfio/qae_mem_hugepage_utils_vfio.c) + +add_library(_usdm ${LIBUSDM_sources}) + +target_include_directories(_usdm PRIVATE + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include + ${LIBUSDM_DIR} + ${LIBUSDM_DIR}/include + ${LIBUSDM_DIR}/user_space) + +add_library (ch_contrib::usdm ALIAS _usdm) diff --git a/contrib/qatlib-cmake/include/mqueue.h b/contrib/qatlib-cmake/include/mqueue.h new file mode 100644 index 00000000000..7b1125074a8 --- /dev/null +++ b/contrib/qatlib-cmake/include/mqueue.h @@ -0,0 +1,14 @@ +/* This is a workaround for a build conflict issue +1. __GLIBC_PREREQ (referenced in OsalServices.c) is only defined in './sysroot/linux-x86_64/include/features.h' +2. mqueue.h only exist under './sysroot/linux-x86_64-musl/' +This cause target_include_directories for _osal has a conflict between './sysroot/linux-x86_64/include' and './sysroot/linux-x86_64-musl/' +hence create mqueue.h separately under ./qatlib-cmake/include as an alternative. +*/ + +/* Major and minor version number of the GNU C library package. Use + these macros to test for features in specific releases. */ +#define __GLIBC__ 2 +#define __GLIBC_MINOR__ 27 + +#define __GLIBC_PREREQ(maj, min) \ + ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) diff --git a/contrib/rocksdb b/contrib/rocksdb index 66e3cbec314..dead55e60b8 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 66e3cbec31400ed3a23deb878c5d7f56f990f0ae +Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430 diff --git a/contrib/sqids-cpp b/contrib/sqids-cpp index 3756e537d4d..a471f53672e 160000 --- a/contrib/sqids-cpp +++ b/contrib/sqids-cpp @@ -1 +1 @@ -Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0 +Subproject commit a471f53672e98d49223f598528a533b07b085c61 diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index d94ffb893e1..b9c7ea34a36 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -41,6 +41,10 @@ readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLIC readarray -t DISKS_METADATA_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.metadata_path' || true) CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" +CLICKHOUSE_PASSWORD_FILE="${CLICKHOUSE_PASSWORD_FILE:-}" +if [[ -n "${CLICKHOUSE_PASSWORD_FILE}" && -f "${CLICKHOUSE_PASSWORD_FILE}" ]]; then + CLICKHOUSE_PASSWORD="$(cat "${CLICKHOUSE_PASSWORD_FILE}")" +fi CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 8aeb06ec27b..050d4b68628 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -242,7 +242,7 @@ quit --create-query-fuzzer-runs=50 \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ - > >(tail -n 100000 > fuzzer.log) \ + > fuzzer.log \ 2>&1 & fuzzer_pid=$! echo "Fuzzer pid is $fuzzer_pid" @@ -390,6 +390,7 @@ rg --text -F '' server.log > fatal.log ||: dmesg -T > dmesg.log ||: zstd --threads=0 server.log +zstd --threads=0 fuzzer.log cat > report.html < @@ -413,7 +414,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

desc->type == ProjectionDescription::Type::Aggregate) - { - query_info.projection->aggregate_overflow_row = aggregate_overflow_row; - query_info.projection->aggregate_final = aggregate_final; - } - if (options.only_analyze) { auto read_nothing = std::make_unique(source_header); @@ -1549,11 +1534,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage)); } - if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "InputOrderInfo is set for projection and for query"); InputOrderInfoPtr input_order_info_for_order; if (!expressions.need_aggregate) - input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; + input_order_info_for_order = query_info.input_order_info; if (options.to_stage > QueryProcessingStage::FetchColumns) { @@ -1614,7 +1597,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), @@ -1788,7 +1771,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(source_header)); - PrewhereInfoPtr prewhere_info_ptr = query_info.projection ? query_info.projection->prewhere_info : query_info.prewhere_info; - if (prewhere_info_ptr) + if (query_info.prewhere_info) { - auto & prewhere_info = *prewhere_info_ptr; + auto & prewhere_info = *query_info.prewhere_info; if (prewhere_info.row_level_filter) { @@ -2087,50 +2068,6 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan( auto read_from_pipe = std::make_unique(std::move(pipe)); read_from_pipe->setStepDescription("Read from NullSource"); query_plan.addStep(std::move(read_from_pipe)); - - if (query_info.projection) - { - if (query_info.projection->before_where) - { - auto where_step = std::make_unique( - query_plan.getCurrentDataStream(), - query_info.projection->before_where, - query_info.projection->where_column_name, - query_info.projection->remove_where_filter); - - where_step->setStepDescription("WHERE"); - query_plan.addStep(std::move(where_step)); - } - - if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) - { - if (query_info.projection->before_aggregation) - { - auto expression_before_aggregation - = std::make_unique(query_plan.getCurrentDataStream(), query_info.projection->before_aggregation); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); - } - - // Let's just choose the safe option since we don't know the value of `to_stage` here. - const bool should_produce_results_in_order_of_bucket_number = true; - - // It is used to determine if we should use memory bound merging strategy. Maybe it makes sense for projections, but so far this case is just left untouched. - SortDescription group_by_sort_description; - - executeMergeAggregatedImpl( - query_plan, - query_info.projection->aggregate_overflow_row, - query_info.projection->aggregate_final, - false, - false, - context_->getSettingsRef(), - query_info.projection->aggregation_keys, - query_info.projection->aggregate_descriptions, - should_produce_results_in_order_of_bucket_number, - std::move(group_by_sort_description)); - } - } } RowPolicyFilterPtr InterpreterSelectQuery::getRowPolicyFilter() const @@ -2574,80 +2511,51 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Create optimizer with prepared actions. /// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge. - if ((optimize_read_in_order || optimize_aggregation_in_order) - && (!query_info.projection || query_info.projection->complete)) + if (optimize_read_in_order) { - if (optimize_read_in_order) - { - if (query_info.projection) - { - query_info.projection->order_optimizer = std::make_shared( - // TODO Do we need a projection variant for this field? - query, - analysis_result.order_by_elements_actions, - getSortDescription(query, context), - query_info.syntax_analyzer_result); - } - else - { - query_info.order_optimizer = std::make_shared( - query, - analysis_result.order_by_elements_actions, - getSortDescription(query, context), - query_info.syntax_analyzer_result); - } - } - else if (optimize_aggregation_in_order) - { - if (query_info.projection) - { - query_info.projection->order_optimizer = std::make_shared( - query, - query_info.projection->group_by_elements_actions, - query_info.projection->group_by_elements_order_descr, - query_info.syntax_analyzer_result); - } - else - { - query_info.order_optimizer = std::make_shared( - query, - analysis_result.group_by_elements_actions, - getSortDescriptionFromGroupBy(query), - query_info.syntax_analyzer_result); - } - } + query_info.order_optimizer = std::make_shared( + query, + analysis_result.order_by_elements_actions, + getSortDescription(query, context), + query_info.syntax_analyzer_result); /// If we don't have filtration, we can pushdown limit to reading stage for optimizations. - UInt64 limit = (query.hasFiltration() || query.groupBy()) ? 0 : getLimitForSorting(query, context); - if (query_info.projection) - query_info.projection->input_order_info - = query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context, limit); - else - query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit); + UInt64 limit = query.hasFiltration() ? 0 : getLimitForSorting(query, context); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit); + } + else if (optimize_aggregation_in_order) + { + query_info.order_optimizer = std::make_shared( + query, + analysis_result.group_by_elements_actions, + getSortDescriptionFromGroupBy(query), + query_info.syntax_analyzer_result); + + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, /*limit=*/ 0); } query_info.storage_limits = std::make_shared(storage_limits); - query_info.settings_limit_offset_done = options.settings_limit_offset_done; + /// Possible filters: row-security, additional filter, replica filter (before array join), where (after array join) + query_info.has_filters_and_no_array_join_before_filter = row_policy_filter || additional_filter_info + || parallel_replicas_custom_filter_info + || (analysis_result.hasWhere() && !analysis_result.before_where->hasArrayJoin() && !analysis_result.array_join); storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) { - const String view_name{}; auto local_storage_id = storage->getStorageID(); context->getQueryContext()->addQueryAccessInfo( backQuoteIfNeed(local_storage_id.getDatabaseName()), local_storage_id.getFullTableName(), - required_columns, - query_info.projection ? query_info.projection->desc->name : "", - view_name); + required_columns); } /// Create step which reads from empty source if storage has no data. if (!query_plan.isInitialized()) { auto header = storage_snapshot->getSampleBlockForColumns(required_columns); - addEmptySourceToQueryPlan(query_plan, header, query_info, context); + addEmptySourceToQueryPlan(query_plan, header, query_info); } } else @@ -2756,13 +2664,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac expression_before_aggregation->setStepDescription("Before GROUP BY"); query_plan.addStep(std::move(expression_before_aggregation)); - if (options.is_projection_query) - return; - AggregateDescriptions aggregates = query_analyzer->aggregates(); - const Settings & settings = context->getSettingsRef(); - const auto & keys = query_analyzer->aggregationKeys().getNames(); auto aggregator_params = getAggregatorParams( @@ -2826,13 +2729,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets) { - /// If aggregate projection was chosen for table, avoid adding MergeAggregated. - /// It is already added by storage (because of performance issues). - /// TODO: We should probably add another one processing stage for storage? - /// WithMergeableStateAfterAggregation is not ok because, e.g., it skips sorting after aggregation. - if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) - return; - const Settings & settings = context->getSettingsRef(); /// Used to determine if we should use memory bound merging strategy. @@ -2985,7 +2881,15 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan) // has suitable sorting. Also don't create sort steps when there are no // columns to sort by, because the sort nodes are confused by this. It // happens in case of `over ()`. - if (!window.full_sort_description.empty() && (i == 0 || !sortIsPrefix(window, *windows_sorted[i - 1]))) + // Even if full_sort_description of both windows match, in case of different + // partitioning we need to add a SortingStep to reshuffle data in the streams. + bool need_sort = !window.full_sort_description.empty(); + if (need_sort && i != 0) + { + need_sort = !sortIsPrefix(window, *windows_sorted[i - 1]) + || (settings.max_threads != 1 && window.partition_by.size() != windows_sorted[i - 1]->partition_by.size()); + } + if (need_sort) { SortingStep::Settings sort_settings(*context); @@ -3333,5 +3237,13 @@ bool InterpreterSelectQuery::isQueryWithFinal(const SelectQueryInfo & info) return result; } +void registerInterpreterSelectQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index ec9612ad248..fbb53d71755 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -117,7 +117,7 @@ public: bool hasAggregation() const { return query_analyzer->hasAggregation(); } static void addEmptySourceToQueryPlan( - QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info, const ContextPtr & context_); + QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info); Names getRequiredColumns() { return required_columns; } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index eed9d03ab5a..868ef170f7c 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -143,7 +144,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, /// because it can lead to a changed header. if (select_query_options.ignore_ast_optimizations || context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - query_tree_pass_manager.run(query_tree, 1 /*up_to_pass_index*/); + query_tree_pass_manager.runOnlyResolve(query_tree); else query_tree_pass_manager.run(query_tree); @@ -267,4 +268,13 @@ void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & el elem.used_row_policies.emplace(used_row_policy); } +void registerInterpreterSelectQueryAnalyzer(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectQueryAnalyzer", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 2ae74955e4f..16bc4b1fe2e 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -410,4 +411,13 @@ void InterpreterSelectWithUnionQuery::extendQueryLogElemImpl(QueryLogElement & e } } +void registerInterpreterSelectWithUnionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectWithUnionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 2c0baa0d4b3..261c781e0ba 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -91,4 +92,12 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta } } +void registerInterpreterSetQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterSetQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index a5b22387448..149ba6d7575 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -164,5 +165,13 @@ BlockIO InterpreterShowColumnsQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowColumnsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowColumnsQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0d60f13af66..9edac1fd8e1 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -104,4 +105,13 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() "statement"}})); } +void registerInterpreterShowCreateQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + + factory.registerInterpreter("InterpreterShowCreateQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowEngineQuery.cpp b/src/Interpreters/InterpreterShowEngineQuery.cpp index 2927fbd0f2d..f2d057a3fcf 100644 --- a/src/Interpreters/InterpreterShowEngineQuery.cpp +++ b/src/Interpreters/InterpreterShowEngineQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,4 +16,13 @@ BlockIO InterpreterShowEnginesQuery::execute() return executeQuery("SELECT * FROM system.table_engines ORDER BY name", getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowEnginesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowEnginesQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp index a9da01b0988..e83f61eac53 100644 --- a/src/Interpreters/InterpreterShowFunctionsQuery.cpp +++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -43,4 +44,13 @@ FROM {}.{})", return rewritten_query; } +void registerInterpreterShowFunctionsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowFunctionsQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 09b70e951db..e8005ead91e 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -104,5 +105,13 @@ BlockIO InterpreterShowIndexesQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowIndexesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowIndexesQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.cpp b/src/Interpreters/InterpreterShowProcesslistQuery.cpp index f711cc0dac9..7bdb94482da 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.cpp +++ b/src/Interpreters/InterpreterShowProcesslistQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,4 +16,13 @@ BlockIO InterpreterShowProcesslistQuery::execute() return executeQuery("SELECT * FROM system.processes ORDER BY elapsed DESC", getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowProcesslistQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowProcesslistQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowSettingQuery.cpp b/src/Interpreters/InterpreterShowSettingQuery.cpp index 45e9b8a1f1c..90acaa7b083 100644 --- a/src/Interpreters/InterpreterShowSettingQuery.cpp +++ b/src/Interpreters/InterpreterShowSettingQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -29,5 +30,13 @@ BlockIO InterpreterShowSettingQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowSettingQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowSettingQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 0ca6578128d..51038aaca46 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -221,4 +222,14 @@ BlockIO InterpreterShowTablesQuery::execute() /// sort the output of SHOW otherwise (SELECT * FROM (SHOW ...) ORDER BY ...) is rejected) and 3. some /// SQL tests can take advantage of this. + +void registerInterpreterShowTablesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowTablesQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index db02ee13a4f..c1fba0c8c5a 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -73,6 +74,10 @@ #include #endif +#if USE_JEMALLOC +#include +#endif + #include "config.h" namespace CurrentMetrics @@ -97,7 +102,6 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } - namespace ActionLocks { extern const StorageActionBlockType PartsMerge; @@ -727,6 +731,33 @@ BlockIO InterpreterSystemQuery::execute() resetCoverage(); break; } + +#if USE_JEMALLOC + case Type::JEMALLOC_PURGE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + purgeJemallocArenas(); + break; + } + case Type::JEMALLOC_ENABLE_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + setJemallocProfileActive(true); + break; + } + case Type::JEMALLOC_DISABLE_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + setJemallocProfileActive(false); + break; + } + case Type::JEMALLOC_FLUSH_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + flushJemallocProfile("/tmp/jemalloc_clickhouse"); + break; + } +#endif default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query"); } @@ -1039,7 +1070,7 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) { LOG_TRACE(log, "Synchronizing entries in replica's queue with table's log and waiting for current last entry to be processed"); auto sync_timeout = getContext()->getSettingsRef().receive_timeout.totalMilliseconds(); - if (!storage_replicated->waitForProcessingQueue(sync_timeout, query.sync_replica_mode)) + if (!storage_replicated->waitForProcessingQueue(sync_timeout, query.sync_replica_mode, query.src_replicas)) { LOG_ERROR(log, "SYNC REPLICA {}: Timed out!", table_id.getNameForLogs()); throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "SYNC REPLICA {}: command timed out. " \ @@ -1367,6 +1398,16 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_LISTEN); break; } +#if USE_JEMALLOC + case Type::JEMALLOC_PURGE: + case Type::JEMALLOC_ENABLE_PROFILE: + case Type::JEMALLOC_DISABLE_PROFILE: + case Type::JEMALLOC_FLUSH_PROFILE: + { + required_access.emplace_back(AccessType::SYSTEM_JEMALLOC); + break; + } +#endif case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: @@ -1378,4 +1419,13 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() return required_access; } +void registerInterpreterSystemQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterSystemQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index a0a82121ba8..b1758013f18 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -135,4 +136,13 @@ BlockIO InterpreterTransactionControlQuery::executeSetSnapshot(ContextMutablePtr return {}; } +void registerInterpreterTransactionControlQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterTransactionControlQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index bdd72b6d3ea..8401c47df6b 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -72,4 +73,13 @@ AccessRightsElements InterpreterUndropQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::UNDROP_TABLE, undrop.getDatabase(), undrop.getTable()); return required_access; } + +void registerInterpreterUndropQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterUndropQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index b71f3a9cc1c..58be12927b9 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -16,4 +17,13 @@ BlockIO InterpreterUseQuery::execute() return {}; } +void registerInterpreterUseQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterUseQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 8865c47a785..2b68c5d7a10 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -12,6 +12,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -103,4 +104,13 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() return pipeline; } +void registerInterpreterWatchQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterWatchQuery", create_fn); +} + } diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 6bd202a1dd7..0aee96ee9c4 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -747,15 +747,8 @@ void NotJoinedBlocks::extractColumnChanges(size_t right_pos, size_t result_pos) void NotJoinedBlocks::correctLowcardAndNullability(Block & block) { - for (auto & [pos, added] : right_nullability_changes) - { - auto & col = block.getByPosition(pos); - if (added) - JoinCommon::convertColumnToNullable(col); - else - JoinCommon::removeColumnNullability(col); - } - + /// First correct LowCardinality, then Nullability, + /// because LowCardinality(Nullable(T)) is possible, but not Nullable(LowCardinality(T)) for (auto & [pos, added] : right_lowcard_changes) { auto & col = block.getByPosition(pos); @@ -771,6 +764,15 @@ void NotJoinedBlocks::correctLowcardAndNullability(Block & block) col.type = recursiveRemoveLowCardinality(col.type); } } + + for (auto & [pos, added] : right_nullability_changes) + { + auto & col = block.getByPosition(pos); + if (added) + JoinCommon::convertColumnToNullable(col); + else + JoinCommon::removeColumnNullability(col); + } } void NotJoinedBlocks::addLeftColumns(Block & block, size_t rows_added) const diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index f0427b5a6ca..4a80e1a3c56 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1124,7 +1124,7 @@ IBlocksStreamPtr MergeJoin::getNonJoinedBlocks( if (table_join->strictness() == JoinStrictness::All && (is_right || is_full)) { size_t left_columns_count = left_sample_block.columns(); - assert(left_columns_count == result_sample_block.columns() - right_columns_to_add.columns()); + chassert(left_columns_count == result_sample_block.columns() - right_columns_to_add.columns()); auto non_joined = std::make_unique(*this, max_block_size); return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); } diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 7993bda4bd9..5f6db0da520 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -12,32 +12,30 @@ namespace DB { -NamesAndTypesList MetricLogElement::getNamesAndTypes() +ColumnsDescription MetricLogElement::getColumnsDescription() { - NamesAndTypesList columns_with_type_and_name; + ColumnsDescription result; - columns_with_type_and_name.emplace_back("hostname", std::make_shared(std::make_shared())); - columns_with_type_and_name.emplace_back("event_date", std::make_shared()); - columns_with_type_and_name.emplace_back("event_time", std::make_shared()); - columns_with_type_and_name.emplace_back("event_time_microseconds", std::make_shared(6)); + result.add({"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}); + result.add({"event_date", std::make_shared(), "Event date."}); + result.add({"event_time", std::make_shared(), "Event time."}); + result.add({"event_time_microseconds", std::make_shared(6), "Event time with microseconds resolution."}); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) { - std::string name; - name += "ProfileEvent_"; - name += ProfileEvents::getName(ProfileEvents::Event(i)); - columns_with_type_and_name.emplace_back(std::move(name), std::make_shared()); + auto name = fmt::format("ProfileEvent_{}", ProfileEvents::getName(ProfileEvents::Event(i))); + const auto * comment = ProfileEvents::getDocumentation(ProfileEvents::Event(i)); + result.add({std::move(name), std::make_shared(), comment}); } for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) { - std::string name; - name += "CurrentMetric_"; - name += CurrentMetrics::getName(CurrentMetrics::Metric(i)); - columns_with_type_and_name.emplace_back(std::move(name), std::make_shared()); + auto name = fmt::format("CurrentMetric_{}", CurrentMetrics::getName(CurrentMetrics::Metric(i))); + const auto * comment = CurrentMetrics::getDocumentation(CurrentMetrics::Metric(i)); + result.add({std::move(name), std::make_shared(), comment}); } - return columns_with_type_and_name; + return result; } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index a57f1cebf71..482681d8276 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ struct MetricLogElement std::vector current_metrics; static std::string name() { return "MetricLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index a6ea03f8a03..86cd2d84fa3 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -190,7 +190,7 @@ bool isStorageTouchedByMutations( if (context->getSettingsRef().allow_experimental_analyzer) { auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context); - InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits().ignoreProjections()); + InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits()); io = interpreter.execute(); } else @@ -200,7 +200,7 @@ bool isStorageTouchedByMutations( /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. interpreter_select_query.emplace( - select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); + select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits()); io = interpreter_select_query->execute(); } @@ -262,7 +262,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) auto alter_command = std::make_shared(); alter_command->type = ASTAlterCommand::DELETE; - alter_command->partition = command.partition; + alter_command->partition = alter_command->children.emplace_back(command.partition).get(); auto row_exists_predicate = makeASTFunction("equals", std::make_shared(LightweightDeleteDescription::FILTER_COLUMN.name), @@ -271,7 +271,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) if (command.predicate) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Mutation command APPLY DELETED MASK does not support WHERE clause"); - alter_command->predicate = row_exists_predicate; + alter_command->predicate = alter_command->children.emplace_back(std::move(row_exists_predicate)).get(); auto mutation_command = MutationCommand::parse(alter_command.get()); if (!mutation_command) @@ -404,7 +404,7 @@ MutationsInterpreter::MutationsInterpreter( , available_columns(std::move(available_columns_)) , context(Context::createCopy(context_)) , settings(std::move(settings_)) - , select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits().ignoreProjections()) + , select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits()) { prepare(!settings.can_execute); } diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 119759265ef..7a4cb91acc0 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -172,6 +172,30 @@ ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, C auto query = alter_command.clone(); auto & new_alter_command = *query->as(); + auto remove_child = [](auto & children, IAST *& erase_ptr) + { + auto it = std::find_if(children.begin(), children.end(), [&](const auto & ptr) { return ptr.get() == erase_ptr; }); + erase_ptr = nullptr; + children.erase(it); + }; + auto visit = [&](auto & visitor) + { + if (new_alter_command.update_assignments) + { + ASTPtr update_assignments = new_alter_command.update_assignments->clone(); + remove_child(new_alter_command.children, new_alter_command.update_assignments); + visitor.visit(update_assignments); + new_alter_command.update_assignments = new_alter_command.children.emplace_back(std::move(update_assignments)).get(); + } + if (new_alter_command.predicate) + { + ASTPtr predicate = new_alter_command.predicate->clone(); + remove_child(new_alter_command.children, new_alter_command.predicate); + visitor.visit(predicate); + new_alter_command.predicate = new_alter_command.children.emplace_back(std::move(predicate)).get(); + } + }; + if (settings.mutations_execute_subqueries_on_initiator) { Scalars scalars; @@ -188,10 +212,7 @@ ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, C settings.mutations_max_literal_size_to_replace}; ExecuteScalarSubqueriesVisitor visitor(data); - if (new_alter_command.update_assignments) - visitor.visit(new_alter_command.update_assignments); - if (new_alter_command.predicate) - visitor.visit(new_alter_command.predicate); + visit(visitor); } if (settings.mutations_execute_nondeterministic_on_initiator) @@ -200,10 +221,7 @@ ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, C context, settings.mutations_max_literal_size_to_replace}; ExecuteNonDeterministicConstFunctionsVisitor visitor(data); - if (new_alter_command.update_assignments) - visitor.visit(new_alter_command.update_assignments); - if (new_alter_command.predicate) - visitor.visit(new_alter_command.predicate); + visit(visitor); } return query; diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index d95a7a42159..0fdc9347ee9 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -638,7 +638,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( auto rewritten_command = std::make_shared(); rewritten_command->type = ASTAlterCommand::ADD_COLUMN; rewritten_command->first = alter_command->first; - rewritten_command->col_decl = additional_columns->children[index]->clone(); + rewritten_command->col_decl = rewritten_command->children.emplace_back(additional_columns->children[index]->clone()).get(); const auto & column_declare = alter_command->additional_columns->children[index]->as(); if (column_declare && column_declare->column_options) @@ -667,8 +667,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( if (!alter_command->column_name.empty()) { - rewritten_command->column = std::make_shared(alter_command->column_name); - rewritten_command->children.push_back(rewritten_command->column); + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(alter_command->column_name)).get(); /// For example(when add_column_1 is last column): /// ALTER TABLE test_database.test_table_2 ADD COLUMN add_column_3 INT AFTER add_column_1, ADD COLUMN add_column_4 INT @@ -679,12 +678,10 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( } else { - rewritten_command->column = std::make_shared(default_after_column); - rewritten_command->children.push_back(rewritten_command->column); + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(default_after_column)).get(); default_after_column = rewritten_command->col_decl->as()->name; } - rewritten_command->children.push_back(rewritten_command->col_decl); rewritten_alter_query->command_list->children.push_back(rewritten_command); } } @@ -692,7 +689,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( { auto rewritten_command = std::make_shared(); rewritten_command->type = ASTAlterCommand::DROP_COLUMN; - rewritten_command->column = std::make_shared(alter_command->column_name); + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(alter_command->column_name)).get(); rewritten_alter_query->command_list->children.push_back(rewritten_command); } else if (alter_command->type == MySQLParser::ASTAlterCommand::RENAME_COLUMN) @@ -702,8 +699,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( /// 'RENAME column_name TO column_name' is not allowed in Clickhouse auto rewritten_command = std::make_shared(); rewritten_command->type = ASTAlterCommand::RENAME_COLUMN; - rewritten_command->column = std::make_shared(alter_command->old_name); - rewritten_command->rename_to = std::make_shared(alter_command->column_name); + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(alter_command->old_name)).get(); + rewritten_command->rename_to = rewritten_command->children.emplace_back(std::make_shared(alter_command->column_name)).get(); rewritten_alter_query->command_list->children.push_back(rewritten_command); } } @@ -726,13 +723,10 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( modify_columns.front().name = alter_command->old_name; const auto & modify_columns_description = createColumnsDescription(modify_columns, alter_command->additional_columns); - rewritten_command->col_decl = InterpreterCreateQuery::formatColumns(modify_columns_description)->children[0]; + rewritten_command->col_decl = rewritten_command->children.emplace_back(InterpreterCreateQuery::formatColumns(modify_columns_description)->children[0]).get(); if (!alter_command->column_name.empty()) - { - rewritten_command->column = std::make_shared(alter_command->column_name); - rewritten_command->children.push_back(rewritten_command->column); - } + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(alter_command->column_name)).get(); rewritten_alter_query->command_list->children.push_back(rewritten_command); } @@ -741,8 +735,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( { auto rewritten_command = std::make_shared(); rewritten_command->type = ASTAlterCommand::RENAME_COLUMN; - rewritten_command->column = std::make_shared(alter_command->old_name); - rewritten_command->rename_to = std::make_shared(new_column_name); + rewritten_command->column = rewritten_command->children.emplace_back(std::make_shared(alter_command->old_name)).get(); + rewritten_command->rename_to = rewritten_command->children.emplace_back(std::make_shared(new_column_name)).get(); rewritten_alter_query->command_list->children.push_back(rewritten_command); } } diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 40aaa63dd6e..fffc1e50da0 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -15,7 +15,7 @@ namespace DB { -NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes() +ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() { auto span_kind_type = std::make_shared( DataTypeEnum8::Values @@ -30,7 +30,8 @@ NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes() auto low_cardinality_string = std::make_shared(std::make_shared()); - return { + return ColumnsDescription + { {"hostname", low_cardinality_string}, {"trace_id", std::make_shared()}, {"span_id", std::make_shared()}, diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index 7368b184e5e..4907a8feb5a 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -15,7 +16,8 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span : OpenTelemetry::Span(span) {} static std::string name() { return "OpenTelemetrySpanLog"; } - static NamesAndTypesList getNamesAndTypes(); + + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 338775bfb0c..9819b8e3ec4 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -57,7 +57,7 @@ PartLogElement::PartMergeAlgorithm PartLogElement::getMergeAlgorithm(MergeAlgori throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast(merge_algorithm_)); } -NamesAndTypesList PartLogElement::getNamesAndTypes() +ColumnsDescription PartLogElement::getColumnsDescription() { auto event_type_datatype = std::make_shared( DataTypeEnum8::Values @@ -92,44 +92,57 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() ColumnsWithTypeAndName columns_with_type_and_name; - return { - {"hostname", std::make_shared(std::make_shared())}, - {"query_id", std::make_shared()}, - {"event_type", std::move(event_type_datatype)}, - {"merge_reason", std::move(merge_reason_datatype)}, - {"merge_algorithm", std::move(merge_algorithm_datatype)}, - {"event_date", std::make_shared()}, + return ColumnsDescription + { + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"query_id", std::make_shared(), "Identifier of the INSERT query that created this data part."}, + {"event_type", std::move(event_type_datatype), + "Type of the event that occurred with the data part. " + "Can have one of the following values: " + "NewPart — Inserting of a new data part, " + "MergeParts — Merging of data parts, " + "DownloadParts — Downloading a data part, " + "RemovePart — Removing or detaching a data part using DETACH PARTITION, " + "MutatePart — Mutating of a data part, " + "MovePart — Moving the data part from the one disk to another one."}, + {"merge_reason", std::move(merge_reason_datatype), + "The reason for the event with type MERGE_PARTS. Can have one of the following values: " + "NotAMerge — The current event has the type other than MERGE_PARTS, " + "RegularMerge — Some regular merge, " + "TTLDeleteMerge — Cleaning up expired data. " + "TTLRecompressMerge — Recompressing data part with the. "}, + {"merge_algorithm", std::move(merge_algorithm_datatype), "Merge algorithm for the event with type MERGE_PARTS. Can have one of the following values: Undecided, Horizontal, Vertical"}, + {"event_date", std::make_shared(), "Event date."}, + {"event_time", std::make_shared(), "Event time."}, + {"event_time_microseconds", std::make_shared(6), "Event time with microseconds precision."}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"duration_ms", std::make_shared(), "Duration of this operation."}, - {"duration_ms", std::make_shared()}, - - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"table_uuid", std::make_shared()}, - {"part_name", std::make_shared()}, - {"partition_id", std::make_shared()}, + {"database", std::make_shared(), "Name of the database the data part is in."}, + {"table", std::make_shared(), "Name of the table the data part is in."}, + {"table_uuid", std::make_shared(), "UUID of the table the data part belongs to."}, + {"part_name", std::make_shared(), "Name of the data part."}, + {"partition_id", std::make_shared(), "ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`."}, {"partition", std::make_shared()}, - {"part_type", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path_on_disk", std::make_shared()}, + {"part_type", std::make_shared(), "The type of the part. Possible values: Wide and Compact."}, + {"disk_name", std::make_shared(), "The disk name data part lies on."}, + {"path_on_disk", std::make_shared(), "Absolute path to the folder with data part files."}, - {"rows", std::make_shared()}, - {"size_in_bytes", std::make_shared()}, // On disk + {"rows", std::make_shared(), "The number of rows in the data part."}, + {"size_in_bytes", std::make_shared(), "Size of the data part on disk in bytes."}, /// Merge-specific info - {"merged_from", std::make_shared(std::make_shared())}, - {"bytes_uncompressed", std::make_shared()}, // Result bytes - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, + {"merged_from", std::make_shared(std::make_shared()), "An array of the source parts names which the current part was made up from."}, + {"bytes_uncompressed", std::make_shared(), "Uncompressed size of the resulting part in bytes."}, + {"read_rows", std::make_shared(), "The number of rows was read during the merge."}, + {"read_bytes", std::make_shared(), "The number of bytes was read during the merge."}, + {"peak_memory_usage", std::make_shared(), "The maximum amount of used during merge RAM"}, /// Is there an error during the execution or commit - {"error", std::make_shared()}, - {"exception", std::make_shared()}, + {"error", std::make_shared(), "The error code of the occurred exception."}, + {"exception", std::make_shared(), "Text message of the occurred error."}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "All the profile events captured during this operation."}, }; } diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 462314f2768..d4cd571d69b 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -93,7 +93,7 @@ struct PartLogElement static MergeReasonType getMergeReasonType(MergeType merge_type); static PartMergeAlgorithm getMergeAlgorithm(MergeAlgorithm merge_algorithm_); - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 68b5d63e613..088d193257c 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -17,9 +17,9 @@ namespace DB { -NamesAndTypesList ProcessorProfileLogElement::getNamesAndTypes() +ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 63791c0374c..49d2c21af89 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -36,7 +37,7 @@ struct ProcessorProfileLogElement size_t output_bytes{}; static std::string name() { return "ProcessorsProfileLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 1e259bb510e..ad6e344655b 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -31,7 +31,7 @@ namespace DB { -NamesAndTypesList QueryLogElement::getNamesAndTypes() +ColumnsDescription QueryLogElement::getColumnsDescription() { auto query_status_datatype = std::make_shared( DataTypeEnum8::Values @@ -54,91 +54,91 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() auto low_cardinality_string = std::make_shared(std::make_shared()); auto array_low_cardinality_string = std::make_shared(low_cardinality_string); - return + return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"type", std::move(query_status_datatype)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"query_start_time", std::make_shared()}, - {"query_start_time_microseconds", std::make_shared(6)}, - {"query_duration_ms", std::make_shared()}, + {"hostname", low_cardinality_string, "Hostname of the server executing the query."}, + {"type", std::move(query_status_datatype), "Type of an event that occurred when executing the query."}, + {"event_date", std::make_shared(), "Query starting date."}, + {"event_time", std::make_shared(), "Query starting time."}, + {"event_time_microseconds", std::make_shared(6), "Query starting time with microseconds precision."}, + {"query_start_time", std::make_shared(), "Start time of query execution."}, + {"query_start_time_microseconds", std::make_shared(6), "Start time of query execution with microsecond precision."}, + {"query_duration_ms", std::make_shared(), "Duration of query execution in milliseconds."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"result_rows", std::make_shared()}, - {"result_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, + {"read_rows", std::make_shared(), "Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for IN and JOIN. For distributed queries read_rows includes the total number of rows read at all replicas. Each replica sends it’s read_rows value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value."}, + {"read_bytes", std::make_shared(), "Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for IN and JOIN. For distributed queries read_bytes includes the total number of rows read at all replicas. Each replica sends it’s read_bytes value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value."}, + {"written_rows", std::make_shared(), "For INSERT queries, the number of written rows. For other queries, the column value is 0."}, + {"written_bytes", std::make_shared(), "For INSERT queries, the number of written bytes (uncompressed). For other queries, the column value is 0."}, + {"result_rows", std::make_shared(), "Number of rows in a result of the SELECT query, or a number of rows in the INSERT query."}, + {"result_bytes", std::make_shared(), "RAM volume in bytes used to store a query result."}, + {"memory_usage", std::make_shared(), "Memory consumption by the query."}, - {"current_database", low_cardinality_string}, - {"query", std::make_shared()}, - {"formatted_query", std::make_shared()}, - {"normalized_query_hash", std::make_shared()}, - {"query_kind", low_cardinality_string}, - {"databases", array_low_cardinality_string}, - {"tables", array_low_cardinality_string}, - {"columns", array_low_cardinality_string}, - {"partitions", array_low_cardinality_string}, - {"projections", array_low_cardinality_string}, - {"views", array_low_cardinality_string}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()}, + {"current_database", low_cardinality_string, "Name of the current database."}, + {"query", std::make_shared(), " Query string."}, + {"formatted_query", std::make_shared(), "Formatted query string."}, + {"normalized_query_hash", std::make_shared(), "Identical hash value without the values of literals for similar queries."}, + {"query_kind", low_cardinality_string, "Type of the query."}, + {"databases", array_low_cardinality_string, "Names of the databases present in the query."}, + {"tables", array_low_cardinality_string, "Names of the tables present in the query."}, + {"columns", array_low_cardinality_string, "Names of the columns present in the query."}, + {"partitions", array_low_cardinality_string, "Names of the partitions present in the query."}, + {"projections", array_low_cardinality_string, "Names of the projections used during the query execution."}, + {"views", array_low_cardinality_string, "Names of the (materialized or live) views present in the query."}, + {"exception_code", std::make_shared(), "Code of an exception."}, + {"exception", std::make_shared(), "Exception message."}, + {"stack_trace", std::make_shared(), "Stack trace. An empty string, if the query was completed successfully."}, - {"is_initial_query", std::make_shared()}, - {"user", low_cardinality_string}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"initial_user", low_cardinality_string}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, - {"initial_query_start_time", std::make_shared()}, - {"initial_query_start_time_microseconds", std::make_shared(6)}, - {"interface", std::make_shared()}, - {"is_secure", std::make_shared()}, - {"os_user", low_cardinality_string}, - {"client_hostname", low_cardinality_string}, - {"client_name", low_cardinality_string}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, - {"http_method", std::make_shared()}, - {"http_user_agent", low_cardinality_string}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Query type. Possible values: 1 — query was initiated by the client, 0 — query was initiated by another query as part of distributed query execution."}, + {"user", low_cardinality_string, "Name of the user who initiated the current query."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address that was used to make the query."}, + {"port", std::make_shared(), "The client port that was used to make the query."}, + {"initial_user", low_cardinality_string, "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, + {"initial_query_start_time", std::make_shared(), "Initial query starting time (for distributed query execution)."}, + {"initial_query_start_time_microseconds", std::make_shared(6), "Initial query starting time with microseconds precision (for distributed query execution)."}, + {"interface", std::make_shared(), "Interface that the query was initiated from. Possible values: 1 — TCP, 2 — HTTP."}, + {"is_secure", std::make_shared(), "The flag whether a query was executed over a secure interface"}, + {"os_user", low_cardinality_string, "Operating system username who runs clickhouse-client."}, + {"client_hostname", low_cardinality_string, "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", low_cardinality_string, "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface, 1 — GET method was used, 2 — POST method was used."}, + {"http_user_agent", low_cardinality_string, "HTTP header UserAgent passed in the HTTP query."}, + {"http_referer", std::make_shared(), "HTTP header Referer passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header X-Forwarded-For passed in the HTTP query."}, + {"quota_key", std::make_shared(), "The quota key specified in the quotas setting (see keyed)."}, + {"distributed_depth", std::make_shared(), "How many times a query was forwarded between servers."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"log_comment", std::make_shared()}, + {"log_comment", std::make_shared(), "Log comment. It can be set to arbitrary string no longer than max_query_size. An empty string if it is not defined."}, - {"thread_ids", std::make_shared(std::make_shared())}, - {"peak_threads_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, - {"Settings", std::make_shared(low_cardinality_string, low_cardinality_string)}, + {"thread_ids", std::make_shared(std::make_shared()), "Thread ids that are participating in query execution. These threads may not have run simultaneously."}, + {"peak_threads_usage", std::make_shared(), "Maximum count of simultaneous threads executing the query."}, + {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events"}, + {"Settings", std::make_shared(low_cardinality_string, low_cardinality_string), "Settings that were changed when the client ran the query. To enable logging changes to settings, set the log_query_settings parameter to 1."}, - {"used_aggregate_functions", array_low_cardinality_string}, - {"used_aggregate_function_combinators", array_low_cardinality_string}, - {"used_database_engines", array_low_cardinality_string}, - {"used_data_type_families", array_low_cardinality_string}, - {"used_dictionaries", array_low_cardinality_string}, - {"used_formats", array_low_cardinality_string}, - {"used_functions", array_low_cardinality_string}, - {"used_storages", array_low_cardinality_string}, - {"used_table_functions", array_low_cardinality_string}, + {"used_aggregate_functions", array_low_cardinality_string, "Canonical names of aggregate functions, which were used during query execution."}, + {"used_aggregate_function_combinators", array_low_cardinality_string, "Canonical names of aggregate functions combinators, which were used during query execution."}, + {"used_database_engines", array_low_cardinality_string, "Canonical names of database engines, which were used during query execution."}, + {"used_data_type_families", array_low_cardinality_string, "Canonical names of data type families, which were used during query execution."}, + {"used_dictionaries", array_low_cardinality_string, "Canonical names of dictionaries, which were used during query execution."}, + {"used_formats", array_low_cardinality_string, "Canonical names of formats, which were used during query execution."}, + {"used_functions", array_low_cardinality_string, "Canonical names of functions, which were used during query execution."}, + {"used_storages", array_low_cardinality_string, "Canonical names of storages, which were used during query execution."}, + {"used_table_functions", array_low_cardinality_string, "Canonical names of table functions, which were used during query execution."}, {"used_row_policies", array_low_cardinality_string}, {"transaction_id", getTransactionIDDataType()}, - {"query_cache_usage", std::move(query_cache_usage_datatype)}, + {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, }; diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index fe9b7cbdbc8..be5cb5835c5 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace ProfileEvents @@ -102,7 +103,7 @@ struct QueryLogElement static std::string name() { return "QueryLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index eed2a38e6da..d153e30a4ce 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -22,11 +22,11 @@ namespace DB { -NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() +ColumnsDescription QueryThreadLogElement::getColumnsDescription() { auto low_cardinality_string = std::make_shared(std::make_shared()); - return + return ColumnsDescription { {"hostname", low_cardinality_string}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index 684d7fce53e..fcce9232dc1 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -5,7 +5,7 @@ #include #include #include - +#include namespace DB { @@ -46,7 +46,7 @@ struct QueryThreadLogElement static std::string name() { return "QueryThreadLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index 7ad3e668bbb..c426f2d3cf0 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -19,7 +19,7 @@ namespace DB { -NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() +ColumnsDescription QueryViewsLogElement::getColumnsDescription() { auto view_status_datatype = std::make_shared(DataTypeEnum8::Values{ {"QueryStart", static_cast(QUERY_START)}, @@ -33,7 +33,8 @@ NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() {"Live", static_cast(ViewType::LIVE)}, {"Window", static_cast(ViewType::WINDOW)}}); - return { + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, @@ -57,7 +58,8 @@ NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() {"status", std::move(view_status_datatype)}, {"exception_code", std::make_shared()}, {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()}}; + {"stack_trace", std::make_shared()} + }; } NamesAndAliases QueryViewsLogElement::getNamesAndAliases() diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index e28bce0b91c..000d0bd385a 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -77,7 +78,7 @@ struct QueryViewsLogElement static std::string name() { return "QueryLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index fdf74b2b926..967becb6e0f 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -14,7 +14,7 @@ namespace DB { -NamesAndTypesList S3QueueLogElement::getNamesAndTypes() +ColumnsDescription S3QueueLogElement::getColumnsDescription() { auto status_datatype = std::make_shared( DataTypeEnum8::Values @@ -22,7 +22,9 @@ NamesAndTypesList S3QueueLogElement::getNamesAndTypes() {"Processed", static_cast(S3QueueLogElement::S3QueueStatus::Processed)}, {"Failed", static_cast(S3QueueLogElement::S3QueueStatus::Failed)}, }); - return { + + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index 76ff5ca0cdc..e0362bf9716 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -28,7 +29,7 @@ struct S3QueueLogElement static std::string name() { return "S3QueueLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index c91329c869c..1e08aec3813 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -33,14 +33,6 @@ struct SelectQueryOptions bool remove_duplicates = false; bool ignore_quota = false; bool ignore_limits = false; - /// This flag is needed to analyze query ignoring table projections. - /// It is needed because we build another one InterpreterSelectQuery while analyzing projections. - /// It helps to avoid infinite recursion. - bool ignore_projections = false; - /// This flag is also used for projection analysis. - /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. - /// It is also used to avoid adding aggregating step when aggregate projection is chosen. - bool is_projection_query = false; /// This flag is needed for projection description. /// Otherwise, keys for GROUP BY may be removed as constants. bool ignore_ast_optimizations = false; @@ -119,18 +111,6 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & ignoreProjections(bool value = true) - { - ignore_projections = value; - return *this; - } - - SelectQueryOptions & projectionQuery(bool value = true) - { - is_projection_query = value; - return *this; - } - SelectQueryOptions & ignoreAlias(bool value = true) { ignore_alias = value; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 162772061b5..d2f9fe8b325 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -112,8 +112,7 @@ public: throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session {} not found", session_id); /// Create a new session from current context. - auto context = Context::createCopy(global_context); - it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; + it = sessions.insert(std::make_pair(key, std::make_shared(key, global_context, timeout, *this))).first; const auto & session = it->second; if (!thread.joinable()) @@ -128,7 +127,7 @@ public: /// Use existing session. const auto & session = it->second; - LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); + LOG_TRACE(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); if (!session.unique()) throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id); @@ -703,6 +702,10 @@ void Session::releaseSessionID() { if (!named_session) return; + + prepared_client_info = getClientInfo(); + session_context.reset(); + named_session->release(); named_session = nullptr; } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 2249d8fbb2f..75e1414b8cb 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -8,6 +8,7 @@ #include #include +#include #include namespace Poco::Net { class SocketAddress; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 61750b5acca..a5bc5012292 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -67,7 +67,7 @@ SessionLogElement::SessionLogElement(const UUID & auth_id_, Type type_) std::tie(event_time, event_time_microseconds) = eventTime(); } -NamesAndTypesList SessionLogElement::getNamesAndTypes() +ColumnsDescription SessionLogElement::getColumnsDescription() { auto event_type = std::make_shared( DataTypeEnum8::Values @@ -119,7 +119,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() std::make_shared() }))); - return + return ColumnsDescription { {"hostname", lc_string_datatype}, {"type", std::move(event_type)}, diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 8757bc12270..0f79a3e5ca7 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -59,7 +60,7 @@ struct SessionLogElement static std::string name() { return "SessionLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 4f283a3f78d..c06fe8f5c90 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include "Common/quoteString.h" #include #include #include @@ -118,7 +120,8 @@ std::shared_ptr createSystemLog( const String & default_database_name, const String & default_table_name, const Poco::Util::AbstractConfiguration & config, - const String & config_prefix) + const String & config_prefix, + const String & comment) { if (!config.has(config_prefix)) { @@ -208,10 +211,14 @@ std::shared_ptr createSystemLog( if (!settings.empty()) log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings; } + + /// Add comment to AST. So it will be saved when the table will be renamed. + log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment)); } /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; + parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -267,32 +274,32 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { - query_log = createSystemLog(global_context, "system", "query_log", config, "query_log"); - query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log"); - part_log = createSystemLog(global_context, "system", "part_log", config, "part_log"); - trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log"); - crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log"); - text_log = createSystemLog(global_context, "system", "text_log", config, "text_log"); - metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log"); - filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log"); + query_log = createSystemLog(global_context, "system", "query_log", config, "query_log", "Contains information about executed queries, for example, start time, duration of processing, error messages."); + query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log", "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing."); + part_log = createSystemLog(global_context, "system", "part_log", config, "part_log", "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data."); + trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log", "Contains stack traces collected by the sampling query profiler."); + crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); + text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); + metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); + filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); filesystem_read_prefetches_log = createSystemLog( - global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log"); + global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); asynchronous_metric_log = createSystemLog( global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log"); + "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, which are saved once per minute."); opentelemetry_span_log = createSystemLog( global_context, "system", "opentelemetry_span_log", config, - "opentelemetry_span_log"); - query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log"); - zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log"); - session_log = createSystemLog(global_context, "system", "session_log", config, "session_log"); + "opentelemetry_span_log", "Contains information about trace spans for executed queries."); + query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log", "Contains information about the dependent views executed when running a query, for example, the view type or the execution time."); + zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log", "This table contains information about the parameters of the request to the ZooKeeper server and the response from it."); + session_log = createSystemLog(global_context, "system", "session_log", config, "session_log", "Contains information about all successful and failed login and logout events."); transactions_info_log = createSystemLog( - global_context, "system", "transactions_info_log", config, "transactions_info_log"); - processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log"); - asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log"); - backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log"); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log"); - blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log"); + global_context, "system", "transactions_info_log", config, "transactions_info_log", "Contains information about all transactions executed on a current server."); + processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); + asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); + backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); + s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) logs.emplace_back(query_log.get()); @@ -484,9 +491,9 @@ void SystemLog::flushImpl(const std::vector & to_flush, prepareTable(); ColumnsWithTypeAndName log_element_columns; - auto log_element_names_and_types = LogElement::getNamesAndTypes(); + auto log_element_names_and_types = LogElement::getColumnsDescription(); - for (const auto & name_and_type : log_element_names_and_types) + for (const auto & name_and_type : log_element_names_and_types.getAll()) log_element_columns.emplace_back(name_and_type.type, name_and_type.name); Block block(std::move(log_element_columns)); @@ -547,6 +554,8 @@ void SystemLog::prepareTable() if (old_create_query != create_query) { + /// TODO: Handle altering comment, because otherwise all table will be renamed. + /// Rename the existing table. int suffix = 0; while (DatabaseCatalog::instance().isTableExist( @@ -626,22 +635,11 @@ ASTPtr SystemLog::getCreateTableQuery() create->setTable(table_id.table_name); auto new_columns_list = std::make_shared(); + auto ordinary_columns = LogElement::getColumnsDescription(); + auto alias_columns = LogElement::getNamesAndAliases(); + ordinary_columns.setAliases(alias_columns); - if (const char * custom_column_list = LogElement::getCustomColumnList()) - { - ParserColumnDeclarationList parser; - const Settings & settings = getContext()->getSettingsRef(); - - ASTPtr columns_list_raw = parseQuery(parser, custom_column_list, "columns declaration list", settings.max_query_size, settings.max_parser_depth); - new_columns_list->set(new_columns_list->columns, columns_list_raw); - } - else - { - auto ordinary_columns = LogElement::getNamesAndTypes(); - auto alias_columns = LogElement::getNamesAndAliases(); - - new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns)); - } + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns)); create->set(create->columns_list, new_columns_list); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index d322af4329c..8c357e43be9 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -25,9 +25,9 @@ namespace DB /// fields static std::string name(); - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); + /// TODO: Remove this method, we can return aliases directly from getColumnsDescription(). static NamesAndAliases getNamesAndAliases(); - static const char * getCustomColumnList(); void appendToBlock(MutableColumns & columns) const; }; */ diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5f3492f0871..efe3fd7f740 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -376,7 +376,8 @@ void TableJoin::addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, boo * For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin, * when part of plan built and types of expression will be known. */ - inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage()); + bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); + inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage(), require_strict_keys_match); if (auto it = left_type_map.find(col.name); it != left_type_map.end()) { @@ -560,7 +561,9 @@ TableJoin::createConvertingActions( NameToNameMap left_column_rename; NameToNameMap right_column_rename; - inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage()); + /// FullSortingMerge join algorithm doesn't support joining keys with different types (e.g. String and Nullable(String)) + bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); + inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), require_strict_keys_match); if (!left_type_map.empty() || !right_type_map.empty()) { left_dag = applyKeyConvertToTable(left_sample_columns, left_type_map, JoinTableSide::Left, left_column_rename); @@ -614,11 +617,8 @@ TableJoin::createConvertingActions( } template -void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right) +void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool require_strict_keys_match) { - /// FullSortingMerge and PartialMerge join algorithms don't support joining keys with different types - /// (e.g. String and LowCardinality(String)) - bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); if (!left_type_map.empty() || !right_type_map.empty()) return; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 247835d9c53..75e2342d1e9 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -218,7 +218,7 @@ private: /// Calculates common supertypes for corresponding join key columns. template - void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right); + void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool require_strict_keys_match); void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 2ea9b805a45..d6971bbac54 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -16,7 +16,7 @@ namespace DB { -NamesAndTypesList TextLogElement::getNamesAndTypes() +ColumnsDescription TextLogElement::getColumnsDescription() { auto priority_datatype = std::make_shared( DataTypeEnum8::Values @@ -32,27 +32,27 @@ NamesAndTypesList TextLogElement::getNamesAndTypes() {"Test", static_cast(Message::PRIO_TEST)}, }); - return + return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(), "Time of the entry."}, + {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, - {"thread_name", std::make_shared(std::make_shared())}, - {"thread_id", std::make_shared()}, + {"thread_name", std::make_shared(std::make_shared()), "Name of the thread from which the logging was done."}, + {"thread_id", std::make_shared(), "OS thread ID."}, - {"level", std::move(priority_datatype)}, - {"query_id", std::make_shared()}, - {"logger_name", std::make_shared(std::make_shared())}, - {"message", std::make_shared()}, + {"level", std::move(priority_datatype), "Entry level. Possible values: 1 or 'Fatal', 2 or 'Critical', 3 or 'Error', 4 or 'Warning', 5 or 'Notice', 6 or 'Information', 7 or 'Debug', 8 or 'Trace'."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"logger_name", std::make_shared(std::make_shared()), "Name of the logger (i.e. DDLWorker)."}, + {"message", std::make_shared(), "The message itself."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"source_file", std::make_shared(std::make_shared())}, - {"source_line", std::make_shared()}, + {"source_file", std::make_shared(std::make_shared()), "Source file from which the logging was done."}, + {"source_line", std::make_shared(), "Source line from which the logging was done."}, - {"message_format_string", std::make_shared(std::make_shared())}, + {"message_format_string", std::make_shared(std::make_shared()), "A format string that was used to format the message."}, }; } diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index bfeca324fde..cdb4de76722 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -30,7 +31,7 @@ struct TextLogElement std::string_view message_format_string; static std::string name() { return "TextLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index d52c3493eaa..26adb0cfc3f 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -25,9 +25,9 @@ const TraceDataType::Values TraceLogElement::trace_values = {"ProfileEvent", static_cast(TraceType::ProfileEvent)}, }; -NamesAndTypesList TraceLogElement::getNamesAndTypes() +ColumnsDescription TraceLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 71aec0b50c4..f4cd29a7a2d 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -37,7 +38,7 @@ struct TraceLogElement ProfileEvents::Count increment{}; static std::string name() { return "TraceLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index 18a8b099ba4..e893be814ca 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -18,7 +18,7 @@ namespace DB { -NamesAndTypesList TransactionsInfoLogElement::getNamesAndTypes() +ColumnsDescription TransactionsInfoLogElement::getColumnsDescription() { auto type_enum = std::make_shared( DataTypeEnum8::Values @@ -32,7 +32,7 @@ NamesAndTypesList TransactionsInfoLogElement::getNamesAndTypes() {"UnlockPart", static_cast(UNLOCK_PART)}, }); - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"type", std::move(type_enum)}, diff --git a/src/Interpreters/TransactionsInfoLog.h b/src/Interpreters/TransactionsInfoLog.h index fc3783b5916..0a607704e74 100644 --- a/src/Interpreters/TransactionsInfoLog.h +++ b/src/Interpreters/TransactionsInfoLog.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -39,7 +40,7 @@ struct TransactionsInfoLogElement String part_name; static std::string name() { return "TransactionsInfoLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 729e2ed6007..b740852b808 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -171,16 +171,13 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) /// copy shared pointer to args in order to ensure lifetime auto args_ast = function->arguments; - - /** remove function call and take a step back to ensure - * next iteration does not skip not yet processed data - */ - remove_expr_at_index(i); - - /// copy non-literal arguments + /// Replace function call in 'group_exprs' with non-literal arguments. + const auto & erase_position = group_exprs.begin() + i; + group_exprs.erase(erase_position); + const auto & insert_position = group_exprs.begin() + i; std::remove_copy_if( std::begin(args_ast->children), std::end(args_ast->children), - std::back_inserter(group_exprs), is_literal + std::inserter(group_exprs, insert_position), is_literal ); } else if (is_literal(group_exprs[i])) diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index b55a9f540c5..9cc31edfe56 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -57,7 +57,7 @@ DataTypePtr getCoordinationErrorCodesEnumType() }); } -NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() +ColumnsDescription ZooKeeperLogElement::getColumnsDescription() { auto type_enum = std::make_shared( DataTypeEnum8::Values @@ -120,7 +120,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"NOTCONNECTED", static_cast(Coordination::State::NOTCONNECTED)}, }); - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"type", std::move(type_enum)}, diff --git a/src/Interpreters/ZooKeeperLog.h b/src/Interpreters/ZooKeeperLog.h index d79b75ec85f..90d36d22a59 100644 --- a/src/Interpreters/ZooKeeperLog.h +++ b/src/Interpreters/ZooKeeperLog.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -68,7 +69,7 @@ struct ZooKeeperLogElement static std::string name() { return "ZooKeeperLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 4e38103ac1f..c3b8405659a 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -492,10 +492,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::String) { /// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible. + /// But don't promote Float32, since we want to keep the exact same value const IDataType * type_to_parse = &type; DataTypePtr holder; - if (type.canBePromoted()) + if (type.canBePromoted() && !which_type.isFloat32()) { holder = type.promoteNumericType(); type_to_parse = holder.get(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 63804d2d86f..4b5a6a84e17 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1010,7 +1010,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getUserName()); + QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1043,7 +1043,7 @@ static std::tuple executeQueryImpl( } } - interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); + interpreter = InterpreterFactory::instance().get(ast, context, SelectQueryOptions(stage).setInternal(internal)); const auto & query_settings = context->getSettingsRef(); if (context->getCurrentTransaction() && query_settings.throw_on_unsupported_query_inside_transaction) @@ -1123,7 +1123,8 @@ static std::tuple executeQueryImpl( { QueryCache::Key key( ast, res.pipeline.getHeader(), - context->getUserName(), settings.query_cache_share_between_users, + context->getUserID(), context->getCurrentRoles(), + settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), settings.query_cache_compress_entries); @@ -1434,11 +1435,12 @@ void executeQuery( const auto & compression_method_node = ast_query_with_output->compression->as(); compression_method = compression_method_node.value.safeGet(); } - + const auto & settings = context->getSettingsRef(); compressed_buffer = wrapWriteBufferWithCompressionMethod( std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), chooseCompressionMethod(out_file, compression_method), - /* compression level = */ 3 + /* compression level = */ static_cast(settings.output_format_compression_level), + /* zstd_window_log = */ static_cast(settings.output_format_compression_zstd_window_log) ); } diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index fd023754abf..6f84a60f2af 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "Processors/Executors/PullingPipelineExecutor.h" #include @@ -29,6 +30,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) context->makeGlobalContext(); context->setApplicationType(Context::ApplicationType::LOCAL); + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 2e9ee0af724..1295a4d5a75 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -20,12 +20,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; } -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context) -{ - return settings.max_parallel_replicas > 1 && context.getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; -} - ASTPtr getCustomKeyFilterForParallelReplica( size_t replicas_count, size_t replica_num, @@ -34,7 +28,7 @@ ASTPtr getCustomKeyFilterForParallelReplica( const ColumnsDescription & columns, const ContextPtr & context) { - assert(replicas_count > 1); + chassert(replicas_count > 1); if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT) { // first we do modulo with replica count diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h index c35f00f3dfd..1506c1992c0 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h @@ -9,9 +9,6 @@ namespace DB { - -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context); - /// Get AST for filter created from custom_key /// replica_num is the number of the replica for which we are generating filter starting from 0 ASTPtr getCustomKeyFilterForParallelReplica( diff --git a/src/Interpreters/registerInterpreters.cpp b/src/Interpreters/registerInterpreters.cpp new file mode 100644 index 00000000000..481d0597a85 --- /dev/null +++ b/src/Interpreters/registerInterpreters.cpp @@ -0,0 +1,120 @@ +#include + +namespace DB +{ + +void registerInterpreterSelectQuery(InterpreterFactory & factory); +void registerInterpreterSelectQueryAnalyzer(InterpreterFactory & factory); +void registerInterpreterSelectWithUnionQuery(InterpreterFactory & factory); +void registerInterpreterSelectIntersectExceptQuery(InterpreterFactory & factory); +void registerInterpreterInsertQuery(InterpreterFactory & factory); +void registerInterpreterCreateQuery(InterpreterFactory & factory); +void registerInterpreterDropQuery(InterpreterFactory & factory); +void registerInterpreterUndropQuery(InterpreterFactory & factory); +void registerInterpreterRenameQuery(InterpreterFactory & factory); +void registerInterpreterShowTablesQuery(InterpreterFactory & factory); +void registerInterpreterShowColumnsQuery(InterpreterFactory & factory); +void registerInterpreterShowIndexesQuery(InterpreterFactory & factory); +void registerInterpreterShowSettingQuery(InterpreterFactory & factory); +void registerInterpreterShowEnginesQuery(InterpreterFactory & factory); +void registerInterpreterShowFunctionsQuery(InterpreterFactory & factory); +void registerInterpreterUseQuery(InterpreterFactory & factory); +void registerInterpreterSetQuery(InterpreterFactory & factory); +void registerInterpreterSetRoleQuery(InterpreterFactory & factory); +void registerInterpreterOptimizeQuery(InterpreterFactory & factory); +void registerInterpreterExistsQuery(InterpreterFactory & factory); +void registerInterpreterShowCreateQuery(InterpreterFactory & factory); +void registerInterpreterDescribeQuery(InterpreterFactory & factory); +void registerInterpreterDescribeCacheQuery(InterpreterFactory & factory); +void registerInterpreterExplainQuery(InterpreterFactory & factory); +void registerInterpreterShowProcesslistQuery(InterpreterFactory & factory); +void registerInterpreterAlterQuery(InterpreterFactory & factory); +void registerInterpreterAlterNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterCheckQuery(InterpreterFactory & factory); +void registerInterpreterKillQueryQuery(InterpreterFactory & factory); +void registerInterpreterSystemQuery(InterpreterFactory & factory); +void registerInterpreterWatchQuery(InterpreterFactory & factory); +void registerInterpreterCreateUserQuery(InterpreterFactory & factory); +void registerInterpreterCreateRoleQuery(InterpreterFactory & factory); +void registerInterpreterCreateQuotaQuery(InterpreterFactory & factory); +void registerInterpreterCreateRowPolicyQuery(InterpreterFactory & factory); +void registerInterpreterCreateSettingsProfileQuery(InterpreterFactory & factory); +void registerInterpreterDropAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterMoveAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterDropNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterGrantQuery(InterpreterFactory & factory); +void registerInterpreterShowCreateAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterShowGrantsQuery(InterpreterFactory & factory); +void registerInterpreterShowAccessEntitiesQuery(InterpreterFactory & factory); +void registerInterpreterShowAccessQuery(InterpreterFactory & factory); +void registerInterpreterShowPrivilegesQuery(InterpreterFactory & factory); +void registerInterpreterExternalDDLQuery(InterpreterFactory & factory); +void registerInterpreterTransactionControlQuery(InterpreterFactory & factory); +void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory); +void registerInterpreterDropFunctionQuery(InterpreterFactory & factory); +void registerInterpreterCreateIndexQuery(InterpreterFactory & factory); +void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterDropIndexQuery(InterpreterFactory & factory); +void registerInterpreterBackupQuery(InterpreterFactory & factory); +void registerInterpreterDeleteQuery(InterpreterFactory & factory); + +void registerInterpreters() +{ + auto & factory = InterpreterFactory::instance(); + + registerInterpreterSelectQuery(factory); + registerInterpreterSelectQueryAnalyzer(factory); + registerInterpreterSelectWithUnionQuery(factory); + registerInterpreterSelectIntersectExceptQuery(factory); + registerInterpreterInsertQuery(factory); + registerInterpreterCreateQuery(factory); + registerInterpreterDropQuery(factory); + registerInterpreterUndropQuery(factory); + registerInterpreterRenameQuery(factory); + registerInterpreterShowTablesQuery(factory); + registerInterpreterShowColumnsQuery(factory); + registerInterpreterShowIndexesQuery(factory); + registerInterpreterShowSettingQuery(factory); + registerInterpreterShowEnginesQuery(factory); + registerInterpreterShowFunctionsQuery(factory); + registerInterpreterUseQuery(factory); + registerInterpreterSetQuery(factory); + registerInterpreterSetRoleQuery(factory); + registerInterpreterOptimizeQuery(factory); + registerInterpreterExistsQuery(factory); + registerInterpreterShowCreateQuery(factory); + registerInterpreterDescribeQuery(factory); + registerInterpreterDescribeCacheQuery(factory); + registerInterpreterExplainQuery(factory); + registerInterpreterShowProcesslistQuery(factory); + registerInterpreterAlterQuery(factory); + registerInterpreterAlterNamedCollectionQuery(factory); + registerInterpreterCheckQuery(factory); + registerInterpreterKillQueryQuery(factory); + registerInterpreterSystemQuery(factory); + registerInterpreterWatchQuery(factory); + registerInterpreterCreateUserQuery(factory); + registerInterpreterCreateRoleQuery(factory); + registerInterpreterCreateQuotaQuery(factory); + registerInterpreterCreateRowPolicyQuery(factory); + registerInterpreterCreateSettingsProfileQuery(factory); + registerInterpreterDropAccessEntityQuery(factory); + registerInterpreterMoveAccessEntityQuery(factory); + registerInterpreterDropNamedCollectionQuery(factory); + registerInterpreterGrantQuery(factory); + registerInterpreterShowCreateAccessEntityQuery(factory); + registerInterpreterShowGrantsQuery(factory); + registerInterpreterShowAccessEntitiesQuery(factory); + registerInterpreterShowAccessQuery(factory); + registerInterpreterShowPrivilegesQuery(factory); + registerInterpreterExternalDDLQuery(factory); + registerInterpreterTransactionControlQuery(factory); + registerInterpreterCreateFunctionQuery(factory); + registerInterpreterDropFunctionQuery(factory); + registerInterpreterCreateIndexQuery(factory); + registerInterpreterCreateNamedCollectionQuery(factory); + registerInterpreterDropIndexQuery(factory); + registerInterpreterBackupQuery(factory); + registerInterpreterDeleteQuery(factory); +} +} diff --git a/src/Interpreters/registerInterpreters.h b/src/Interpreters/registerInterpreters.h new file mode 100644 index 00000000000..9f0c3bbec22 --- /dev/null +++ b/src/Interpreters/registerInterpreters.h @@ -0,0 +1,6 @@ +#pragma once + +namespace DB +{ +void registerInterpreters(); +} diff --git a/src/Interpreters/tests/gtest_convertFieldToType.cpp b/src/Interpreters/tests/gtest_convertFieldToType.cpp index cda9311dcbe..ea1c5c43a25 100644 --- a/src/Interpreters/tests/gtest_convertFieldToType.cpp +++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp @@ -74,9 +74,9 @@ INSTANTIATE_TEST_SUITE_P( // Max value of Date { "Date", - Field(std::numeric_limits::max()), + Field(std::numeric_limits::max()), "DateTime64(0, 'UTC')", - DecimalField(DateTime64(std::numeric_limits::max() * Day), 0) + DecimalField(DateTime64(std::numeric_limits::max() * Day), 0) }, // check that scale is respected { diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 84355817b2c..e229095df1b 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -22,60 +22,47 @@ ASTPtr ASTAlterCommand::clone() const res->children.clear(); if (col_decl) - { - res->col_decl = col_decl->clone(); - res->children.push_back(res->col_decl); - } + res->col_decl = res->children.emplace_back(col_decl->clone()).get(); if (column) - { - res->column = column->clone(); - res->children.push_back(res->column); - } + res->column = res->children.emplace_back(column->clone()).get(); if (order_by) - { - res->order_by = order_by->clone(); - res->children.push_back(res->order_by); - } + res->order_by = res->children.emplace_back(order_by->clone()).get(); + if (sample_by) + res->sample_by = res->children.emplace_back(sample_by->clone()).get(); + if (index_decl) + res->index_decl = res->children.emplace_back(index_decl->clone()).get(); + if (index) + res->index = res->children.emplace_back(index->clone()).get(); + if (constraint_decl) + res->constraint_decl = res->children.emplace_back(constraint_decl->clone()).get(); + if (constraint) + res->constraint = res->children.emplace_back(constraint->clone()).get(); + if (projection_decl) + res->projection_decl = res->children.emplace_back(projection_decl->clone()).get(); + if (projection) + res->projection = res->children.emplace_back(projection->clone()).get(); + if (statistic_decl) + res->statistic_decl = res->children.emplace_back(statistic_decl->clone()).get(); if (partition) - { - res->partition = partition->clone(); - res->children.push_back(res->partition); - } + res->partition = res->children.emplace_back(partition->clone()).get(); if (predicate) - { - res->predicate = predicate->clone(); - res->children.push_back(res->predicate); - } - if (ttl) - { - res->ttl = ttl->clone(); - res->children.push_back(res->ttl); - } - if (settings_changes) - { - res->settings_changes = settings_changes->clone(); - res->children.push_back(res->settings_changes); - } - if (settings_resets) - { - res->settings_resets = settings_resets->clone(); - res->children.push_back(res->settings_resets); - } - if (values) - { - res->values = values->clone(); - res->children.push_back(res->values); - } - if (rename_to) - { - res->rename_to = rename_to->clone(); - res->children.push_back(res->rename_to); - } + res->predicate = res->children.emplace_back(predicate->clone()).get(); + if (update_assignments) + res->update_assignments = res->children.emplace_back(update_assignments->clone()).get(); if (comment) - { - res->comment = comment->clone(); - res->children.push_back(res->comment); - } + res->comment = res->children.emplace_back(comment->clone()).get(); + if (ttl) + res->ttl = res->children.emplace_back(ttl->clone()).get(); + if (settings_changes) + res->settings_changes = res->children.emplace_back(settings_changes->clone()).get(); + if (settings_resets) + res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); + if (select) + res->select = res->children.emplace_back(select->clone()).get(); + if (values) + res->values = res->children.emplace_back(values->clone()).get(); + if (rename_to) + res->rename_to = res->children.emplace_back(rename_to->clone()).get(); return res; } @@ -486,6 +473,32 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER"); } +void ASTAlterCommand::forEachPointerToChild(std::function f) +{ + f(reinterpret_cast(&col_decl)); + f(reinterpret_cast(&column)); + f(reinterpret_cast(&order_by)); + f(reinterpret_cast(&sample_by)); + f(reinterpret_cast(&index_decl)); + f(reinterpret_cast(&index)); + f(reinterpret_cast(&constraint_decl)); + f(reinterpret_cast(&constraint)); + f(reinterpret_cast(&projection_decl)); + f(reinterpret_cast(&projection)); + f(reinterpret_cast(&statistic_decl)); + f(reinterpret_cast(&partition)); + f(reinterpret_cast(&predicate)); + f(reinterpret_cast(&update_assignments)); + f(reinterpret_cast(&comment)); + f(reinterpret_cast(&ttl)); + f(reinterpret_cast(&settings_changes)); + f(reinterpret_cast(&settings_resets)); + f(reinterpret_cast(&select)); + f(reinterpret_cast(&values)); + f(reinterpret_cast(&rename_to)); +} + + bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const { if (command_list) @@ -624,4 +637,11 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState } } +void ASTAlterQuery::forEachPointerToChild(std::function f) +{ + for (const auto & child : command_list->children) + child->as().forEachPointerToChild(f); + f(reinterpret_cast(&command_list)); +} + } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 0b115537a6d..c17f260b660 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -23,6 +23,8 @@ namespace DB class ASTAlterCommand : public IAST { + friend class ASTAlterQuery; + public: enum Type { @@ -89,83 +91,85 @@ public: * This field is not used in the DROP query * In MODIFY query, the column name and the new type are stored here */ - ASTPtr col_decl; + IAST * col_decl = nullptr; /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER * The DROP query stores the column name for deletion here * Also used for RENAME COLUMN. */ - ASTPtr column; + IAST * column = nullptr; /** For MODIFY ORDER BY */ - ASTPtr order_by; + IAST * order_by = nullptr; /** For MODIFY SAMPLE BY */ - ASTPtr sample_by; + IAST * sample_by = nullptr; /** The ADD INDEX query stores the IndexDeclaration there. */ - ASTPtr index_decl; + IAST * index_decl = nullptr; /** The ADD INDEX query stores the name of the index following AFTER. * The DROP INDEX query stores the name for deletion. * The MATERIALIZE INDEX query stores the name of the index to materialize. * The CLEAR INDEX query stores the name of the index to clear. */ - ASTPtr index; + IAST * index = nullptr; /** The ADD CONSTRAINT query stores the ConstraintDeclaration there. */ - ASTPtr constraint_decl; + IAST * constraint_decl = nullptr; /** The DROP CONSTRAINT query stores the name for deletion. */ - ASTPtr constraint; + IAST * constraint = nullptr; /** The ADD PROJECTION query stores the ProjectionDeclaration there. */ - ASTPtr projection_decl; + IAST * projection_decl = nullptr; /** The ADD PROJECTION query stores the name of the projection following AFTER. * The DROP PROJECTION query stores the name for deletion. * The MATERIALIZE PROJECTION query stores the name of the projection to materialize. * The CLEAR PROJECTION query stores the name of the projection to clear. */ - ASTPtr projection; + IAST * projection = nullptr; - ASTPtr statistic_decl; + IAST * statistic_decl = nullptr; /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries. * The value or ID of the partition is stored here. */ - ASTPtr partition; + IAST * partition = nullptr; /// For DELETE/UPDATE WHERE: the predicate that filters the rows to delete/update. - ASTPtr predicate; + IAST * predicate = nullptr; /// A list of expressions of the form `column = expr` for the UPDATE command. - ASTPtr update_assignments; + IAST * update_assignments = nullptr; /// A column comment - ASTPtr comment; + IAST * comment = nullptr; /// For MODIFY TTL query - ASTPtr ttl; + IAST * ttl = nullptr; /// FOR MODIFY_SETTING - ASTPtr settings_changes; + IAST * settings_changes = nullptr; /// FOR RESET_SETTING - ASTPtr settings_resets; + IAST * settings_resets = nullptr; /// For MODIFY_QUERY - ASTPtr select; + IAST * select = nullptr; - /** In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - */ - ASTPtr values; + /// In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here + IAST * values = nullptr; + + /// Target column name + IAST * rename_to = nullptr; /// For MODIFY REFRESH ASTPtr refresh; @@ -211,9 +215,6 @@ public: String to_database; String to_table; - /// Target column name - ASTPtr rename_to; - /// Which property user want to remove String remove_property; @@ -223,6 +224,8 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override; }; class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster @@ -270,10 +273,7 @@ protected: bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const; - void forEachPointerToChild(std::function f) override - { - f(reinterpret_cast(&command_list)); - } + void forEachPointerToChild(std::function f) override; }; } diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 30b172ecbb8..c2854e2235c 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -4,17 +4,9 @@ #include #include #include +#include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 6976683678e..34a1ae6e8e0 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -5,17 +5,10 @@ #include #include #include +#include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif namespace DB { diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 17d4b9a9d58..4e03dad5b5b 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -64,11 +64,13 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma ASTPtr ASTCreateIndexQuery::convertToASTAlterCommand() const { auto command = std::make_shared(); + command->type = ASTAlterCommand::ADD_INDEX; - command->index = index_name->clone(); - command->index_decl = index_decl->clone(); command->if_not_exists = if_not_exists; + command->index = command->children.emplace_back(index_name).get(); + command->index_decl = command->children.emplace_back(index_decl).get(); + return command; } diff --git a/src/Parsers/ASTDropIndexQuery.cpp b/src/Parsers/ASTDropIndexQuery.cpp index 43de582ba8a..6866770d2ab 100644 --- a/src/Parsers/ASTDropIndexQuery.cpp +++ b/src/Parsers/ASTDropIndexQuery.cpp @@ -55,10 +55,12 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS ASTPtr ASTDropIndexQuery::convertToASTAlterCommand() const { auto command = std::make_shared(); + command->type = ASTAlterCommand::DROP_INDEX; - command->index = index_name->clone(); command->if_exists = if_exists; + command->index = command->children.emplace_back(index_name).get(); + return command; } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 90d9ede7337..4bb1d2eef30 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -143,10 +143,19 @@ ASTPtr ASTProjectionSelectQuery::cloneToASTSelect() const if (groupBy()) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, groupBy()->clone()); + /// Attach settings to prevent AST transformations. We already have ignored AST optimizations + /// for projection queries. Only remaining settings need to be added here. + /// + /// NOTE: `count_distinct_implementation` has already been selected during the creation of the + /// projection, so there will be no countDistinct(...) to rewrite in projection queries. + /// Ideally, we should aim for a unique and normalized query representation that remains + /// unchanged after the AST rewrite. For instance, we can add -OrEmpty, realIn as the default + /// behavior w.r.t -OrNull, nullIn. auto settings_query = std::make_shared(); SettingsChanges settings_changes; - settings_changes.insertSetting("optimize_aggregators_of_group_by_keys", false); - settings_changes.insertSetting("optimize_group_by_function_keys", false); + settings_changes.insertSetting("aggregate_functions_null_for_empty", false); + settings_changes.insertSetting("transform_null_in", false); + settings_changes.insertSetting("legacy_column_name_of_tuple_literal", false); settings_query->changes = std::move(settings_changes); settings_query->is_standalone = false; select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings_query)); diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index c005d49a93d..fc4ecf4763a 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -207,6 +207,23 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << ' '; print_keyword(magic_enum::enum_name(sync_replica_mode)); + + // If the mode is LIGHTWEIGHT and specific source replicas are specified + if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) + { + settings.ostr << ' '; + print_keyword("FROM"); + settings.ostr << ' '; + + for (auto it = src_replicas.begin(); it != src_replicas.end(); ++it) + { + print_identifier(*it); + + // Add a comma and space after each identifier, except the last one + if (std::next(it) != src_replicas.end()) + settings.ostr << ", "; + } + } } } else if (type == Type::SYNC_DATABASE_REPLICA) diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index fc26f5dee1c..027bb1ec56f 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -46,6 +46,12 @@ public: WAIT_LOADING_PARTS, DROP_REPLICA, DROP_DATABASE_REPLICA, +#if USE_JEMALLOC + JEMALLOC_PURGE, + JEMALLOC_ENABLE_PROFILE, + JEMALLOC_DISABLE_PROFILE, + JEMALLOC_FLUSH_PROFILE, +#endif SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, @@ -138,6 +144,8 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; + std::unordered_set src_replicas; + ServerType server_type; /// For SYSTEM TEST VIEW (SET FAKE TIME

cached_s3
+ + + + + + s3 + + cached_s3 + diff --git a/tests/config/config.d/storage_conf_02963.xml b/tests/config/config.d/storage_conf_02963.xml new file mode 100644 index 00000000000..0672965e99d --- /dev/null +++ b/tests/config/config.d/storage_conf_02963.xml @@ -0,0 +1,15 @@ + + + + + object_storage + s3 + s3_disk/ + http://localhost:11111/test/common/ + clickhouse + clickhouse + 20000 + + + + diff --git a/tests/config/config.d/storage_metadata_with_full_object_key.xml b/tests/config/config.d/storage_metadata_with_full_object_key.xml new file mode 100644 index 00000000000..2bb8d49ec4b --- /dev/null +++ b/tests/config/config.d/storage_metadata_with_full_object_key.xml @@ -0,0 +1,5 @@ + + + + 1 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 2f9fd44c9b0..a68a4c19501 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -141,7 +141,26 @@ if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; th fi if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then - ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + object_key_types_options=("generate-suffix" "generate-full-key" "generate-template-key") + object_key_type="${object_key_types_options[0]}" + + if [[ -n "$RANDOMIZE_OBJECT_KEY_TYPE" ]] && [[ "$RANDOMIZE_OBJECT_KEY_TYPE" -eq 1 ]]; then + object_key_type="${object_key_types_options[$(($RANDOM % ${#object_key_types_options[@]}))]}" + fi + + case $object_key_type in + "generate-full-key") + ln -sf $SRC_PATH/config.d/storage_metadata_with_full_object_key.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + ;; + "generate-template-key") + ln -sf $SRC_PATH/config.d/storage_metadata_with_full_object_key.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/s3_storage_policy_with_template_object_key.xml $DEST_SERVER_PATH/config.d/s3_storage_policy_by_default.xml + ;; + "generate-suffix"|*) + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + ;; + esac fi ARM="aarch64" @@ -157,6 +176,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/ fi diff --git a/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml b/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml new file mode 100644 index 00000000000..64739e3c7aa --- /dev/null +++ b/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml @@ -0,0 +1,6 @@ + + + /var/log/clickhouse-server/clickhouse-library-bridge.log + trace + + diff --git a/tests/integration/test_catboost_evaluate/test.py b/tests/integration/test_catboost_evaluate/test.py index 7412d34dd40..bf4f9f85cac 100644 --- a/tests/integration/test_catboost_evaluate/test.py +++ b/tests/integration/test_catboost_evaluate/test.py @@ -12,7 +12,9 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", stay_alive=True, main_configs=["config/models_config.xml"] + "instance", + stay_alive=True, + main_configs=["config/models_config.xml", "config/logger_library_bridge.xml"], ) diff --git a/tests/integration/test_cluster_discovery/common.py b/tests/integration/test_cluster_discovery/common.py new file mode 100644 index 00000000000..c66e9361973 --- /dev/null +++ b/tests/integration/test_cluster_discovery/common.py @@ -0,0 +1,41 @@ +import time + + +def check_on_cluster( + nodes, + expected, + *, + what, + cluster_name="test_auto_cluster", + msg=None, + retries=5, + query_params={}, +): + """ + Select data from `system.clusters` on specified nodes and check the result + """ + assert 1 <= retries <= 6 + + node_results = {} + for retry in range(1, retries + 1): + for node in nodes: + if node_results.get(node.name) == expected: + # do not retry node after success + continue + query_text = ( + f"SELECT {what} FROM system.clusters WHERE cluster = '{cluster_name}'" + ) + node_results[node.name] = int(node.query(query_text, **query_params)) + + if all(actual == expected for actual in node_results.values()): + break + + print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}") + + if retry != retries: + time.sleep(2**retry) + else: + msg = msg or f"Wrong '{what}' result" + raise Exception( + f"{msg}: {node_results}, expected: {expected} (after {retries} retries)" + ) diff --git a/tests/integration/test_cluster_discovery/config/config_with_pwd.xml b/tests/integration/test_cluster_discovery/config/config_with_pwd.xml new file mode 100644 index 00000000000..8a2f138bccb --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_pwd.xml @@ -0,0 +1,21 @@ + + 1 + + + + + /clickhouse/discovery/test_auto_cluster_with_pwd + user1 + password123 + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_pwd + user1 + wrongpass1234 + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_with_secret1.xml b/tests/integration/test_cluster_discovery/config/config_with_secret1.xml new file mode 100644 index 00000000000..d91c7d448eb --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_secret1.xml @@ -0,0 +1,22 @@ + + 1 + + + + + + /clickhouse/discovery/test_auto_cluster_with_secret + secret123 + + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_secret + correctsecret321 + + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_with_secret2.xml b/tests/integration/test_cluster_discovery/config/config_with_secret2.xml new file mode 100644 index 00000000000..6dfca01c940 --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_secret2.xml @@ -0,0 +1,22 @@ + + 1 + + + + + + /clickhouse/discovery/test_auto_cluster_with_secret + secret123 + + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_secret + wrongsecret333 + + + + + + diff --git a/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml b/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml new file mode 100644 index 00000000000..eaf33c8f70a --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml @@ -0,0 +1,17 @@ + + + + + + + + + passwordAbc + default + + + password123 + default + + + diff --git a/tests/integration/test_cluster_discovery/test.py b/tests/integration/test_cluster_discovery/test.py index ad3deb5b142..ab21c72cec4 100644 --- a/tests/integration/test_cluster_discovery/test.py +++ b/tests/integration/test_cluster_discovery/test.py @@ -1,7 +1,8 @@ import pytest import functools -import time +from .common import check_on_cluster + from helpers.cluster import ClickHouseCluster @@ -36,39 +37,6 @@ def start_cluster(): cluster.shutdown() -def check_on_cluster( - nodes, expected, *, what, cluster_name="test_auto_cluster", msg=None, retries=5 -): - """ - Select data from `system.clusters` on specified nodes and check the result - """ - assert 1 <= retries <= 6 - - node_results = {} - for retry in range(1, retries + 1): - for node in nodes: - if node_results.get(node.name) == expected: - # do not retry node after success - continue - query_text = ( - f"SELECT {what} FROM system.clusters WHERE cluster = '{cluster_name}'" - ) - node_results[node.name] = int(node.query(query_text)) - - if all(actual == expected for actual in node_results.values()): - break - - print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}") - - if retry != retries: - time.sleep(2**retry) - else: - msg = msg or f"Wrong '{what}' result" - raise Exception( - f"{msg}: {node_results}, expected: {expected} (after {retries} retries)" - ) - - def test_cluster_discovery_startup_and_stop(start_cluster): """ Start cluster, check nodes count in system.clusters, diff --git a/tests/integration/test_cluster_discovery/test_password.py b/tests/integration/test_cluster_discovery/test_password.py new file mode 100644 index 00000000000..bf593260d6f --- /dev/null +++ b/tests/integration/test_cluster_discovery/test_password.py @@ -0,0 +1,72 @@ +import pytest + +from .common import check_on_cluster + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +nodes = { + "node0": cluster.add_instance( + "node0", + main_configs=["config/config_with_pwd.xml", "config/config_with_secret1.xml"], + user_configs=["config/users.d/users_with_pwd.xml"], + stay_alive=True, + with_zookeeper=True, + ), + "node1": cluster.add_instance( + "node1", + main_configs=["config/config_with_pwd.xml", "config/config_with_secret2.xml"], + user_configs=["config/users.d/users_with_pwd.xml"], + stay_alive=True, + with_zookeeper=True, + ), +} + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_connect_with_password(start_cluster): + check_on_cluster( + [nodes["node0"], nodes["node1"]], + len(nodes), + cluster_name="test_auto_cluster_with_pwd", + what="count()", + msg="Wrong nodes count in cluster", + query_params={"password": "passwordAbc"}, + ) + + result = nodes["node0"].query( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_pwd', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert result == "3\n3\n", result + + result = nodes["node0"].query_and_get_error( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_wrong_pwd', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert "Authentication failed" in result, result + + result = nodes["node0"].query( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_secret', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert result == "3\n3\n", result + + result = nodes["node0"].query_and_get_error( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_wrong_secret', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + + # With an incorrect secret, we don't get "Authentication failed", but the connection is simply dropped. + # So, we get messages like "Connection reset by peer" or "Attempt to read after eof". + # We only check that an error occurred and the message is not empty. + assert result diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 45ee876aa1d..ec34c181371 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -127,6 +127,50 @@ def check_secrets_for_tables(test_cases, password): ) +def test_backup_table(): + password = new_password() + + setup_queries = [ + "CREATE TABLE backup_test (x int) ENGINE = MergeTree ORDER BY x", + "INSERT INTO backup_test SELECT * FROM numbers(10)", + ] + + endpoints_with_credentials = [ + ( + f"S3('http://minio1:9001/root/data/backup_test_base', 'minio', '{password}')", + f"S3('http://minio1:9001/root/data/backup_test_incremental', 'minio', '{password}')", + ) + ] + + for query in setup_queries: + node.query_and_get_answer_with_error(query) + + # Actually need to make two backups to have base_backup + def make_test_case(endpoint_specs): + # Run ASYNC so it returns the backup id + return ( + f"BACKUP TABLE backup_test TO {endpoint_specs[0]} ASYNC", + f"BACKUP TABLE backup_test TO {endpoint_specs[1]} SETTINGS async=1, base_backup={endpoint_specs[0]}", + ) + + test_cases = [ + make_test_case(endpoint_spec) for endpoint_spec in endpoints_with_credentials + ] + for base_query, inc_query in test_cases: + node.query_and_get_answer_with_error(base_query)[0] + + inc_backup_query_output = node.query_and_get_answer_with_error(inc_query)[0] + inc_backup_id = TSV.toMat(inc_backup_query_output)[0][0] + names_in_system_backups_output, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id}'" + ) + + base_backup_name, name = TSV.toMat(names_in_system_backups_output)[0] + + assert password not in base_backup_name + assert password not in name + + def test_create_table(): password = new_password() diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 286a2d29541..97c8b65f15d 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -2714,3 +2714,698 @@ def table_with_indexes(clickhouse_node, mysql_node, service_name): mysql_node.query(f"DROP DATABASE IF EXISTS {db}") clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def binlog_client_test(clickhouse_node, mysql_node, replication): + db = "binlog_client_test" + replication.create_db_mysql(db) + + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100000, column="score") + replication.create_db_ch(f"{db}1", from_mysql_db=db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}1 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + + num_rows = replication.inserted_rows + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch(f"{db}2", from_mysql_db=db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}2 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + num_rows = replication.inserted_rows + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}1'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}2'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + # Catch up + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + + replication.drop_dbs_ch() + replication.create_db_ch( + f"{db}1", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.create_db_ch( + f"{db}2", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.insert_data(db, "t", 10000, column="score") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}1 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}2 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 10000, column="score") + + num_rows = replication.inserted_rows + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}1'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}2'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1", + ) + + mysql_node.query(f"UPDATE {db}.t SET score = score + 1") + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}2'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}3'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}1.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}2.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}3.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}1") + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=10, + ) + + +def binlog_client_timeout_test(clickhouse_node, mysql_node, replication): + db = "binlog_client_timeout_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 10000, column="score") + num_rows = replication.inserted_rows + + replication.create_db_ch( + f"{db}1", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=100000000, max_milliseconds_to_wait_in_binlog_queue=60000", + ) + replication.create_db_ch( + f"{db}2", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=100", + ) + replication.create_db_ch( + f"{db}4", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=10", + ) + + # After incremental sync + check_query( + clickhouse_node, + f"/* expect: 100000000, 60000 */ SELECT max_bytes, max_waiting_ms FROM system.mysql_binlogs WHERE name = '{db}1'", + f"100000000\t60000\n", + interval_seconds=1, + retry_count=10, + ) + check_query( + clickhouse_node, + f"/* expect: 10 */ SELECT max_bytes FROM system.mysql_binlogs WHERE name = '{db}2'", + f"10\n", + interval_seconds=2, + retry_count=10, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}4.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE {db}3") + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=10", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}3 FORMAT TSV", + "t\n", + ) + + clickhouse_node.query(f"DROP DATABASE {db}4") + replication.create_db_ch( + f"{db}4", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=50", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}4 FORMAT TSV", + "t\n", + ) + + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 1000 AND id < 100000" + ) + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 1000") + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}1.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}2.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}3.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}4.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}2'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}3'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}4'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}4.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}1.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}2.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}3.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}4.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + + +def wrong_password_test(clickhouse_node, mysql_node, replication): + db = "wrong_password_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100, column="score") + with pytest.raises(Exception) as exc: + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{replication.mysql_host}:3306', '{db}', 'root', 'wrong_password') SETTINGS use_binlog_client=1" + ) + + replication.create_db_ch(db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100, column="score") + check_query( + clickhouse_node, + f"/* expect: 200 */ SELECT COUNT() FROM {db}.t ", + "200\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: root@{replication.mysql_host}:3306 */ SELECT binlog_client_name FROM system.mysql_binlogs WHERE name = '{db}'", + f"root@{replication.mysql_host}:3306\n", + interval_seconds=1, + retry_count=30, + ) + + +def dispatcher_buffer_test(clickhouse_node, mysql_node, replication): + db = "dispatcher_buffer_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100, column="score") + rows_count = 100 + replication.create_db_ch( + db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=0, max_flush_milliseconds_in_binlog_dispatcher=0", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + rows_count += 100000 + + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE {db}") + replication.create_db_ch( + f"{db}", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=1000, max_flush_milliseconds_in_binlog_dispatcher=1000", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 10000, column="score") + rows_count += 10000 + + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 10000") + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-11-01'" + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-11-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch( + db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=100000000, max_flush_milliseconds_in_binlog_dispatcher=1000", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + rows_count += 100000 + + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 10000") + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-11-01'" + ) + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM system.mysql_binlogs WHERE name = '{db}' AND (dispatcher_events_read_per_sec > 0 OR dispatcher_bytes_read_per_sec > 0 OR dispatcher_events_flush_per_sec > 0 OR dispatcher_bytes_flush_per_sec > 0)", + f"1\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-11-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + +def gtid_after_attach_test(clickhouse_node, mysql_node, replication): + db = "gtid_after_attach_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + + db_count = 6 + for i in range(db_count): + replication.create_db_ch( + f"{db}{i}", + from_mysql_db=db, + settings="use_binlog_client=1", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}0 FORMAT TSV", + "t\n", + ) + for i in range(int(db_count / 2)): + clickhouse_node.query(f"DETACH DATABASE {db}{i}") + + mysql_node.query(f"USE {db}") + rows = 10000 + for i in range(100): + mysql_node.query(f"ALTER TABLE t ADD COLUMN (e{i} INT)") + replication.insert_data(db, "t", rows, column="score") + + clickhouse_node.restart_clickhouse(stop_start_wait_sec=120) + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=300, + ) diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index f3369e25d94..727188a4b86 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,3 +1,4 @@ +import os import time import pymysql.cursors import pytest @@ -142,6 +143,145 @@ def clickhouse_node(): yield node_db +class ReplicationHelper: + def __init__(self, clickhouse, mysql, mysql_host=None): + self.clickhouse = clickhouse + self.mysql = mysql + self.created_mysql_dbs = [] + self.created_clickhouse_dbs = [] + self.base_mysql_settings = os.getenv("TEST_BASE_MYSQL_SETTINGS", "") + self.base_ch_settings = os.getenv("TEST_BASE_CH_SETTINGS", "") + self.mysql_host = mysql_host if mysql_host is not None else cluster.mysql8_host + self.created_insert_procedures = {} + self.inserted_rows_per_sp = {} + self.inserted_rows = 0 + + def create_dbs(self, db_name, ch_settings="", mysql_settings=""): + self.create_db_mysql(db_name, settings=mysql_settings) + self.create_db_ch(db_name, settings=ch_settings) + + def create_db_mysql(self, db_name, settings=""): + self.mysql.query(f"DROP DATABASE IF EXISTS {db_name}") + self.mysql.query( + f"CREATE DATABASE {db_name} {self.base_mysql_settings} {settings}" + ) + self.created_mysql_dbs.append(db_name) + + def create_db_ch( + self, db_name, from_mysql_db=None, settings="", table_overrides="" + ): + if from_mysql_db is None: + from_mysql_db = db_name + self.clickhouse.query(f"DROP DATABASE IF EXISTS {db_name}") + all_settings = "" + create_query = f"CREATE DATABASE {db_name} ENGINE = MaterializedMySQL('{self.mysql_host}:3306', '{from_mysql_db}', 'root', 'clickhouse')" + if self.base_ch_settings or settings: + separator = ", " if self.base_ch_settings and settings else "" + create_query += f" SETTINGS {self.base_ch_settings}{separator}{settings}" + if table_overrides: + create_query += f" {table_overrides}" + self.clickhouse.query(create_query) + self.created_clickhouse_dbs.append(db_name) + + def drop_dbs_mysql(self): + for db_name in self.created_mysql_dbs: + self.mysql.query(f"DROP DATABASE IF EXISTS {db_name}") + self.created_mysql_dbs = [] + self.created_insert_procedures = {} + self.inserted_rows_per_sp = {} + self.inserted_rows = 0 + + def drop_dbs_ch(self): + for db_name in self.created_clickhouse_dbs: + self.clickhouse.query(f"DROP DATABASE IF EXISTS {db_name}") + self.created_clickhouse_dbs = [] + + def drop_dbs(self): + self.drop_dbs_mysql() + self.drop_dbs_ch() + + def create_stored_procedure(self, db, table, column): + sp_id = f"{db}_{table}_{column}" + if sp_id in self.created_insert_procedures: + return sp_id + self.mysql.query(f"DROP PROCEDURE IF EXISTS {db}.insert_test_data_{sp_id}") + self.mysql.query( + f""" +CREATE PROCEDURE {db}.insert_test_data_{sp_id}(IN num_rows INT, IN existing_rows INT) +BEGIN + DECLARE i INT; + SET i = existing_rows; + SET @insert = concat("INSERT INTO {table} ({column}) VALUES "); + SET @exedata = ""; + WHILE i < (num_rows + existing_rows) DO + SET @exedata=concat(@exedata, ",(", i , ")"); + SET i = i + 1; + IF i % 1000 = 0 + THEN + SET @exedata = SUBSTRING(@exedata, 2); + SET @exesql = concat(@insert, @exedata); + PREPARE stmt FROM @exesql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + SET @exedata = ""; + END IF; + END WHILE; + IF length(@exedata) > 0 + THEN + SET @exedata = SUBSTRING(@exedata, 2); + SET @exesql = concat(@insert, @exedata); + PREPARE stmt FROM @exesql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + END IF; +END""" + ) + self.created_insert_procedures[sp_id] = True + self.inserted_rows_per_sp[sp_id] = 0 + return sp_id + + def insert_data(self, db, table, num_rows, column="id"): + """Inserts num_rows into db.table, into the column `column` (which must be INT)""" + sp_id = self.create_stored_procedure(db, table, column) + self.mysql.query( + f"CALL {db}.insert_test_data_{sp_id}({num_rows}, {self.inserted_rows_per_sp[sp_id]})" + ) + self.inserted_rows_per_sp[sp_id] += num_rows + self.inserted_rows += num_rows + + def wait_for_sync_to_catch_up( + self, database: str = "", retry_count=30, interval_seconds=1 + ): + if database == "": + database = self.created_clickhouse_dbs[-1] + mysql_gtid = self.mysql.query_and_get_data("SELECT @@GLOBAL.gtid_executed")[0][ + 0 + ] + materialized_with_ddl.check_query( + self.clickhouse, + f"SELECT executed_gtid_set /* expect: {mysql_gtid} */ FROM system.materialized_mysql_databases WHERE name = '{database}'", + f"{mysql_gtid}\n", + retry_count=retry_count, + interval_seconds=interval_seconds, + ) + + +@pytest.fixture(scope="function") +def replication(started_mysql_8_0, request): + try: + replication = ReplicationHelper(node_db, started_mysql_8_0) + yield replication + finally: + if hasattr(request.session, "testsfailed") and request.session.testsfailed: + logging.warning(f"tests failed - not dropping databases") + else: + # drop databases only if the test succeeds - so we can inspect the database after failed tests + try: + replication.drop_dbs() + except Exception as e: + logging.warning(f"replication.drop_dbs() failed: {e}") + + def test_materialized_database_dml_with_mysql_5_7( started_cluster, started_mysql_5_7, clickhouse_node: ClickHouseInstance ): @@ -556,3 +696,21 @@ def test_table_with_indexes(started_cluster, started_mysql_8_0, clickhouse_node) materialized_with_ddl.table_with_indexes( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_binlog_client(started_cluster, started_mysql_8_0, replication): + materialized_with_ddl.binlog_client_test(node_db, started_mysql_8_0, replication) + replication.drop_dbs() + materialized_with_ddl.binlog_client_timeout_test( + node_db, started_mysql_8_0, replication + ) + replication.drop_dbs() + materialized_with_ddl.wrong_password_test(node_db, started_mysql_8_0, replication) + replication.drop_dbs() + materialized_with_ddl.dispatcher_buffer_test( + node_db, started_mysql_8_0, replication + ) + replication.drop_dbs() + materialized_with_ddl.gtid_after_attach_test( + node_db, started_mysql_8_0, replication + ) diff --git a/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml b/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml new file mode 100644 index 00000000000..c686b37a537 --- /dev/null +++ b/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index baac2661506..c646a678512 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -87,8 +87,3 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) - else: - # we first transform all replicas into shards and then append for each shard filter - assert n1.contains_in_log( - "Single shard cluster used with custom_key, transforming replicas into virtual shards" - ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py b/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml new file mode 100644 index 00000000000..da4e2517a44 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml @@ -0,0 +1,26 @@ + + + + + false + + n1 + 9000 + + + n2 + 1234 + + + n3 + 9000 + + + n4 + 1234 + + + + + + diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py new file mode 100644 index 00000000000..2b5aa2682d5 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -0,0 +1,128 @@ +import pytest +import uuid +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + +nodes = [node1, node3] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name): + node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + node1.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + node3.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + + # create distributed table + node1.query(f"DROP TABLE IF EXISTS {table_name}_d SYNC") + node1.query( + f""" + CREATE TABLE {table_name}_d AS {table_name} + Engine=Distributed( + {cluster}, + currentDatabase(), + {table_name}, + key + ) + """ + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + + +@pytest.mark.parametrize("use_hedged_requests", [1, 0]) +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_parallel_replicas_custom_key_failover( + start_cluster, + use_hedged_requests, + custom_key, + filter_type, + prefer_localhost_replica, +): + cluster = "test_single_shard_multiple_replicas" + table = "test_table" + + create_tables(cluster, table) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = uuid.uuid4() + assert ( + node1.query( + f"SELECT key, count() FROM {table}_d GROUP BY key ORDER BY key", + settings={ + "log_comment": log_comment, + "prefer_localhost_replica": prefer_localhost_replica, + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "use_hedged_requests": use_hedged_requests, + # "async_socket_for_remote": 0, + # "async_query_sending_for_remote": 0, + }, + ) + == expected_result + ) + + for node in nodes: + node.query("system flush logs") + + # the subqueries should be spread over available nodes + query_id = node1.query( + f"SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id" + ) + assert query_id != "" + query_id = query_id[:-1] + + if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t3\nn3\t2\n" + ) diff --git a/tests/integration/test_remote_blobs_naming/configs/setting.xml b/tests/integration/test_remote_blobs_naming/configs/setting.xml new file mode 100644 index 00000000000..408fa36fdd3 --- /dev/null +++ b/tests/integration/test_remote_blobs_naming/configs/setting.xml @@ -0,0 +1,11 @@ + + + + + + 1 + 1 + + + + diff --git a/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml index 31c6a3bf968..e901f0df51d 100644 --- a/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml +++ b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml @@ -9,17 +9,17 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/old-style-prefix/with-several-section/ minio minio123 - s3_plain - http://minio1:9001/root/data/s3_pain_key_prefix - minio - minio123 - true - + s3_plain + http://minio1:9001/root/data/s3_pain_key_prefix + minio + minio123 + true + @@ -30,7 +30,6 @@ -
@@ -38,6 +37,13 @@
+ + +
+ s3 +
+
+
diff --git a/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml b/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml new file mode 100644 index 00000000000..c3b515e8777 --- /dev/null +++ b/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml @@ -0,0 +1,61 @@ + + + + + test + + + + + + s3 + http://minio1:9001/root/old-style-prefix/with-several-section/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/s3_pain_key_prefix + minio + minio123 + true + + + s3 + http://minio1:9001/root/ + minio + minio123 + old-style-prefix/with-several-section + [a-z]{3}-first-random-part/new-style-prefix/constant-part/[a-z]{3}/[a-z]{29} + + + + + + +
+ s3 +
+
+
+ + +
+ s3_plain +
+
+
+ + +
+ s3_template_key +
+
+
+
+
+ + + s3 + +
diff --git a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py index 485bf73dad1..8c52b05dba2 100644 --- a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py +++ b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 - +from contextlib import contextmanager +from difflib import unified_diff import logging +import re import pytest import os @@ -27,7 +29,7 @@ def cluster(): "new_node", main_configs=[ "configs/new_node.xml", - "configs/storage_conf.xml", + "configs/storage_conf_new.xml", ], user_configs=[ "configs/settings.xml", @@ -49,6 +51,7 @@ def cluster(): with_zookeeper=True, stay_alive=True, ) + logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -200,8 +203,32 @@ def test_write_new_format(cluster): assert remote == object_key -@pytest.mark.parametrize("storage_policy", ["s3", "s3_plain"]) -def test_replicated_merge_tree(cluster, storage_policy): +@contextmanager +def drop_table_scope(nodes, tables, create_statements): + try: + for node in nodes: + for statement in create_statements: + node.query(statement) + yield + finally: + for node in nodes: + for table in tables: + node.query(f"DROP TABLE IF EXISTS {table} SYNC") + + +@pytest.mark.parametrize( + "test_case", + [ + ("s3_plain", False), + ("s3", False), + ("s3", True), + ("s3_template_key", False), + ("s3_template_key", True), + ], +) +def test_replicated_merge_tree(cluster, test_case): + storage_policy, zero_copy = test_case + if storage_policy == "s3_plain": # MergeTree table doesn't work on s3_plain. Rename operation is not implemented return @@ -209,35 +236,172 @@ def test_replicated_merge_tree(cluster, storage_policy): node_old = cluster.instances["node"] node_new = cluster.instances["new_node"] + zk_table_path = f"/clickhouse/tables/test_replicated_merge_tree_{storage_policy}{'_zero_copy' if zero_copy else ''}" create_table_statement = f""" - CREATE TABLE test_replicated_merge_tree ( - id Int64, - val String - ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_replicated_merge_tree_{storage_policy}', '{{replica}}') - PARTITION BY id - ORDER BY (id, val) - SETTINGS - storage_policy='{storage_policy}' - """ + CREATE TABLE test_replicated_merge_tree ( + id Int64, + val String + ) ENGINE=ReplicatedMergeTree('{zk_table_path}', '{{replica}}') + PARTITION BY id + ORDER BY (id, val) + SETTINGS + storage_policy='{storage_policy}', + allow_remote_fs_zero_copy_replication='{1 if zero_copy else 0}' + """ - node_old.query(create_table_statement) - node_new.query(create_table_statement) + with drop_table_scope( + [node_old, node_new], ["test_replicated_merge_tree"], [create_table_statement] + ): + node_old.query("INSERT INTO test_replicated_merge_tree VALUES (0, 'a')") + node_new.query("INSERT INTO test_replicated_merge_tree VALUES (1, 'b')") - node_old.query("INSERT INTO test_replicated_merge_tree VALUES (0, 'a')") - node_new.query("INSERT INTO test_replicated_merge_tree VALUES (1, 'b')") + # node_old have to fetch metadata from node_new and vice versa + node_old.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") + node_new.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") - # node_old have to fetch metadata from node_new and vice versa - node_old.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") - node_new.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") + count_old = node_old.query( + "SELECT count() FROM test_replicated_merge_tree" + ).strip() + count_new = node_new.query( + "SELECT count() FROM test_replicated_merge_tree" + ).strip() - count_old = node_old.query("SELECT count() FROM test_replicated_merge_tree").strip() - count_new = node_new.query("SELECT count() FROM test_replicated_merge_tree").strip() + assert count_old == "2" + assert count_new == "2" - assert count_old == "2" - assert count_new == "2" + if not zero_copy: + return - node_old.query("DROP TABLE test_replicated_merge_tree SYNC") - node_new.query("DROP TABLE test_replicated_merge_tree SYNC") + def get_remote_pathes(node, table_name, only_remote_path=True): + uuid = node.query( + f""" + SELECT uuid + FROM system.tables + WHERE name = '{table_name}' + """ + ).strip() + assert uuid + return node.query( + f""" + SELECT {"remote_path" if only_remote_path else "*"} + FROM system.remote_data_paths + WHERE + local_path LIKE '%{uuid}%' + AND local_path NOT LIKE '%format_version.txt%' + ORDER BY ALL + """ + ).strip() + + remote_pathes_old = get_remote_pathes(node_old, "test_replicated_merge_tree") + remote_pathes_new = get_remote_pathes(node_new, "test_replicated_merge_tree") + + assert len(remote_pathes_old) > 0 + assert remote_pathes_old == remote_pathes_new, ( + str(unified_diff(remote_pathes_old, remote_pathes_new)) + + "\n\nold:\n" + + get_remote_pathes(node_old, "test_replicated_merge_tree", False) + + "\n\nnew:\n" + + get_remote_pathes(node_new, "test_replicated_merge_tree", False) + ) + + def count_lines_with(lines, pattern): + return sum([1 for x in lines if pattern in x]) + + remore_pathes_with_old_format = count_lines_with( + remote_pathes_old.split(), "old-style-prefix" + ) + remore_pathes_with_new_format = count_lines_with( + remote_pathes_old.split(), "new-style-prefix" + ) + + if storage_policy == "s3_template_key": + assert remore_pathes_with_old_format == remore_pathes_with_new_format + assert remore_pathes_with_old_format == len(remote_pathes_old.split()) / 2 + else: + assert remore_pathes_with_old_format == len(remote_pathes_old.split()) + assert remore_pathes_with_new_format == 0 + + parts = ( + node_old.query( + """ + SELECT name + FROM system.parts + WHERE + table = 'test_replicated_merge_tree' + AND active + ORDER BY ALL + """ + ) + .strip() + .split() + ) + table_shared_uuid = node_old.query( + f"SELECT value FROM system.zookeeper WHERE path='{zk_table_path}' and name='table_shared_id'" + ).strip() + + part_blobs = {} + blobs_replicas = {} + + for part in parts: + blobs = ( + node_old.query( + f""" + SELECT name + FROM system.zookeeper + WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}' + ORDER BY ALL + """ + ) + .strip() + .split() + ) + + for blob in blobs: + replicas = ( + node_old.query( + f""" + SELECT name + FROM system.zookeeper + WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}/{blob}' + ORDER BY ALL + """ + ) + .strip() + .split() + ) + assert blob not in blobs_replicas + blobs_replicas[blob] = replicas + + assert part not in part_blobs + part_blobs[part] = blobs + + assert len(parts) == 2, "parts: " + str(parts) + assert len(part_blobs.keys()) == len(parts), ( + "part_blobs: " + str(part_blobs) + "; parts: " + str(parts) + ) + assert len(blobs_replicas.keys()) == len(parts), ( + "blobs_replicas: " + str(blobs_replicas) + "; parts: " + str(parts) + ) + + for replicas in blobs_replicas.values(): + assert len(replicas) == 2, "blobs_replicas: " + str(blobs_replicas) + + for blob in blobs_replicas.keys(): + assert re.match( + "(old-style-prefix_with-several-section|[a-z]{3}-first-random-part_new-style-prefix_constant-part)_[a-z]{3}_[a-z]{29}", + blob, + ), "blobs_replicas: " + str(blobs_replicas) + + old_style_count = sum( + [1 for x in blobs_replicas.keys() if "old-style-prefix" in x] + ) + new_style_count = sum( + [1 for x in blobs_replicas.keys() if "new-style-prefix" in x] + ) + + assert (new_style_count > 0 and old_style_count == new_style_count) or ( + new_style_count == 0 and old_style_count == len(blobs_replicas) + ) def switch_config_write_full_object_key(node, enable): diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 1fc3fe37044..f6994b8d15e 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -338,6 +338,8 @@ def test_alter_drop_part(started_cluster, engine): main_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (123)") if engine == "MergeTree": dummy_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (456)") + else: + main_node.query(f"SYSTEM SYNC REPLICA {database}.alter_drop_part PULL") main_node.query(f"ALTER TABLE {database}.alter_drop_part DROP PART '{part_name}'") assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 2176b0151ff..1c7814435db 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -892,12 +892,14 @@ def test_kafka_formats(kafka_cluster): """ expected_rows_count = raw_expected.count("\n") - instance.query_with_retry( + result_checker = lambda res: res.count("\n") == expected_rows_count + res = instance.query_with_retry( f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", retry_count=30, sleep_time=1, - check_callback=lambda res: res.count("\n") == expected_rows_count, + check_callback=result_checker, ) + assert result_checker(res) for format_name, format_opts in list(all_formats.items()): logging.debug(("Checking {}".format(format_name))) @@ -3808,12 +3810,14 @@ def test_kafka_formats_with_broken_message(kafka_cluster): """ expected_rows_count = raw_expected.count("\n") - instance.query_with_retry( + result_checker = lambda res: res.count("\n") == expected_rows_count + res = instance.query_with_retry( f"SELECT * FROM test.kafka_data_{list(all_formats.keys())[-1]}_mv;", retry_count=30, sleep_time=1, - check_callback=lambda res: res.count("\n") == expected_rows_count, + check_callback=result_checker, ) + assert result_checker(res) for format_name, format_opts in list(all_formats.items()): logging.debug(f"Checking {format_name}") @@ -4931,6 +4935,80 @@ def test_formats_errors(kafka_cluster): instance.query("DROP TABLE test.view") +def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "multiple_read_from_mv" + kafka_create_topic(admin_client, topic) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka_multiple_read_input; + DROP TABLE IF EXISTS test.kafka_multiple_read_table; + DROP TABLE IF EXISTS test.kafka_multiple_read_mv; + + CREATE TABLE test.kafka_multiple_read_input (id Int64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka_multiple_read_table (id Int64) + ENGINE = MergeTree + ORDER BY id; + + + CREATE MATERIALIZED VIEW IF NOT EXISTS test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS + SELECT id + FROM test.kafka_multiple_read_input + WHERE id NOT IN ( + SELECT id + FROM test.kafka_multiple_read_table + WHERE id IN ( + SELECT id + FROM test.kafka_multiple_read_input + ) + ); + """ + ) + + kafka_produce( + kafka_cluster, topic, [json.dumps({"id": 42}), json.dumps({"id": 43})] + ) + + expected_result = "42\n43\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + retry_count=30, + sleep_time=0.5, + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + # Verify that the query deduplicates the records as it meant to be + messages = [] + for i in range(0, 10): + messages.append(json.dumps({"id": 42})) + messages.append(json.dumps({"id": 43})) + + messages.append(json.dumps({"id": 44})) + + kafka_produce(kafka_cluster, topic, messages) + + expected_result = "42\n43\n44\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + retry_count=30, + sleep_time=0.5, + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 3e3132949e7..e2257026dc7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -859,6 +859,55 @@ def test_settings(started_cluster): conn.close() +def test_mysql_point(started_cluster): + table_name = "test_mysql_point" + node1.query(f"DROP TABLE IF EXISTS {table_name}") + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + f""" + CREATE TABLE `clickhouse`.`{table_name}` ( + `id` int NOT NULL, + `point` Point NOT NULL, + PRIMARY KEY (`id`)) ENGINE=InnoDB; + """ + ) + cursor.execute( + f"INSERT INTO `clickhouse`.`{table_name}` SELECT 1, Point(15, 20)" + ) + assert 1 == cursor.execute(f"SELECT count(*) FROM `clickhouse`.`{table_name}`") + + conn.commit() + + result = node1.query( + f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint" + + assert 1 == int( + node1.query( + f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + ) + assert ( + "(15,20)" + == node1.query( + f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ).strip() + ) + + node1.query("DROP TABLE IF EXISTS test") + node1.query( + f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert "(15,20)" == node1.query(f"SELECT point FROM test").strip() + + drop_mysql_table(conn, table_name) + conn.close() + + if __name__ == "__main__": with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): diff --git a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml deleted file mode 100644 index 7dac6d9fbb5..00000000000 --- a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - http://resolver:8080 -
Authorization: Bearer TOKEN
-
- - http://resolver:8080/root-with-auth/restricteddirectory/ - -
-
diff --git a/tests/integration/test_storage_s3_queue/configs/named_collections.xml b/tests/integration/test_storage_s3_queue/configs/named_collections.xml deleted file mode 100644 index 64674e2a3e3..00000000000 --- a/tests/integration/test_storage_s3_queue/configs/named_collections.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - http://minio1:9001/root/test_table - minio - minio123 - - - http://minio1:9001/root/test_parquet - minio - minio123 - - - http://minio1:9001/root/test_parquet_gz - minio - minio123 - - - http://minio1:9001/root/test_orc - minio - minio123 - - - http://minio1:9001/root/test_native - minio - minio123 - - - http://minio1:9001/root/test.arrow - minio - minio123 - - - http://minio1:9001/root/test.parquet - minio - minio123 - - - http://minio1:9001/root/test_cache4.jsonl - true - - - diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b83c095a7a6..7d40060fec6 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1,6 +1,5 @@ import io import logging -import os import random import time @@ -9,75 +8,57 @@ from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance import json -""" -export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-server -export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-client -export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-odbc-bridge -export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/home/sergey/vkr/ClickHouse/programs/server -""" - -MINIO_INTERNAL_PORT = 9001 AVAILABLE_MODES = ["unordered", "ordered"] -AUTH = "'minio','minio123'," -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_AUTH = ["'minio'", "'minio123'"] +NO_AUTH = ["NOSIGN"] -def prepare_s3_bucket(started_cluster): - # Allows read-write access for bucket without authorization. - bucket_read_write_policy = { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetBucketLocation", - "Resource": "arn:aws:s3:::root", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:ListBucket", - "Resource": "arn:aws:s3:::root", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::root/*", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:PutObject", - "Resource": "arn:aws:s3:::root/*", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:DeleteObject", - "Resource": "arn:aws:s3:::root/*", - }, - ], - } +def prepare_public_s3_bucket(started_cluster): + def create_bucket(client, bucket_name, policy): + if client.bucket_exists(bucket_name): + client.remove_bucket(bucket_name) + + client.make_bucket(bucket_name) + + client.set_bucket_policy(bucket_name, json.dumps(policy)) + + def get_policy_with_public_access(bucket_name): + return { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetBucketLocation", + "s3:ListBucket", + ], + "Resource": f"arn:aws:s3:::{bucket_name}", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + ], + "Resource": f"arn:aws:s3:::{bucket_name}/*", + }, + ], + } minio_client = started_cluster.minio_client - minio_client.set_bucket_policy( - started_cluster.minio_bucket, json.dumps(bucket_read_write_policy) - ) - started_cluster.minio_restricted_bucket = "{}-with-auth".format( - started_cluster.minio_bucket + started_cluster.minio_public_bucket = f"{started_cluster.minio_bucket}-public" + create_bucket( + minio_client, + started_cluster.minio_public_bucket, + get_policy_with_public_access(started_cluster.minio_public_bucket), ) - if minio_client.bucket_exists(started_cluster.minio_restricted_bucket): - minio_client.remove_bucket(started_cluster.minio_restricted_bucket) - - minio_client.make_bucket(started_cluster.minio_restricted_bucket) @pytest.fixture(autouse=True) @@ -89,11 +70,9 @@ def s3_queue_setup_teardown(started_cluster): instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") minio = started_cluster.minio_client - objects = list( - minio.list_objects(started_cluster.minio_restricted_bucket, recursive=True) - ) + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) for obj in objects: - minio.remove_object(started_cluster.minio_restricted_bucket, obj.object_name) + minio.remove_object(started_cluster.minio_bucket, obj.object_name) yield # run test @@ -107,8 +86,6 @@ def started_cluster(): with_minio=True, with_zookeeper=True, main_configs=[ - "configs/defaultS3.xml", - "configs/named_collections.xml", "configs/zookeeper.xml", "configs/s3queue_log.xml", ], @@ -119,8 +96,6 @@ def started_cluster(): with_minio=True, with_zookeeper=True, main_configs=[ - "configs/defaultS3.xml", - "configs/named_collections.xml", "configs/s3queue_log.xml", ], ) @@ -129,7 +104,6 @@ def started_cluster(): cluster.start() logging.info("Cluster started") - prepare_s3_bucket(cluster) yield cluster finally: cluster.shutdown() @@ -146,7 +120,13 @@ def run_query(instance, query, stdin=None, settings=None): def generate_random_files( - started_cluster, files_path, count, column_num=3, row_num=10, start_ind=0 + started_cluster, + files_path, + count, + column_num=3, + row_num=10, + start_ind=0, + bucket=None, ): files = [ (f"{files_path}/test_{i}.csv", i) for i in range(start_ind, start_ind + count) @@ -164,28 +144,14 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - put_s3_file_content(started_cluster, filename, values_csv) + put_s3_file_content(started_cluster, filename, values_csv, bucket) return total_values -def put_s3_file_content(started_cluster, filename, data): +def put_s3_file_content(started_cluster, filename, data, bucket=None): + bucket = started_cluster.minio_bucket if bucket is None else bucket buf = io.BytesIO(data) - started_cluster.minio_client.put_object( - started_cluster.minio_bucket, filename, buf, len(data) - ) - - -def get_s3_file_content(started_cluster, bucket, filename, decode=True): - # type: (ClickHouseCluster, str, str, bool) -> str - # Returns content of given S3 file as string. - - data = started_cluster.minio_client.get_object(bucket, filename) - data_str = b"" - for chunk in data.stream(): - data_str += chunk - if decode: - return data_str.decode() - return data_str + started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) def create_table( @@ -197,7 +163,12 @@ def create_table( format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", + auth=DEFAULT_AUTH, + bucket=None, ): + auth_params = ",".join(auth) + bucket = started_cluster.minio_bucket if bucket is None else bucket + settings = { "s3queue_loading_retries": 0, "after_processing": "keep", @@ -206,11 +177,11 @@ def create_table( } settings.update(additional_settings) - url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{started_cluster.minio_bucket}/{files_path}/" + url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" CREATE TABLE {table_name} ({format}) - ENGINE = S3Queue('{url}', {AUTH}'{file_format}') + ENGINE = S3Queue('{url}', {auth_params}, {file_format}) SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ node.query(create_query) @@ -922,3 +893,70 @@ def test_drop_table(started_cluster): ) or node.contains_in_log( f"StorageS3Queue ({table_name}): Shutdown was called, stopping sync" ) + + +def test_s3_client_reused(started_cluster): + node = started_cluster.instances["instance"] + table_name = f"test.test_s3_client_reused" + dst_table_name = f"{table_name}_dst" + files_path = f"{table_name}_data" + row_num = 10 + + def get_created_s3_clients_count(): + value = node.query( + f"SELECT value FROM system.events WHERE event='S3Clients'" + ).strip() + return int(value) if value != "" else 0 + + def wait_all_processed(files_num): + expected_count = files_num * row_num + for _ in range(100): + count = int(node.query(f"SELECT count() FROM {dst_table_name}")) + print(f"{count}/{expected_count}") + if count == expected_count: + break + time.sleep(1) + assert ( + int(node.query(f"SELECT count() FROM {dst_table_name}")) == expected_count + ) + + prepare_public_s3_bucket(started_cluster) + + s3_clients_before = get_created_s3_clients_count() + + create_table( + started_cluster, + node, + table_name, + "ordered", + files_path, + additional_settings={ + "after_processing": "delete", + "s3queue_processing_threads_num": 1, + }, + auth=NO_AUTH, + bucket=started_cluster.minio_public_bucket, + ) + + s3_clients_after = get_created_s3_clients_count() + assert s3_clients_before + 1 == s3_clients_after + + create_mv(node, table_name, dst_table_name) + + for i in range(0, 10): + s3_clients_before = get_created_s3_clients_count() + + generate_random_files( + started_cluster, + files_path, + count=1, + start_ind=i, + row_num=row_num, + bucket=started_cluster.minio_public_bucket, + ) + + wait_all_processed(i + 1) + + s3_clients_after = get_created_s3_clients_count() + + assert s3_clients_before == s3_clients_after diff --git a/tests/performance/array_element.xml b/tests/performance/array_element.xml index 1f82b833380..c3641f426f3 100644 --- a/tests/performance/array_element.xml +++ b/tests/performance/array_element.xml @@ -2,4 +2,20 @@ SELECT count() FROM numbers(100000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], []][number % 2 + 2]) + + + select materialize(array(array(1,2,3,4)))[1] from numbers(10000000) format Null + select materialize(array(array(1,2,3,4)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(array('hello', 'world')))[1] from numbers(10000000) format Null + select materialize(array(array('hello', 'world')))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(map('hello', 1, 'world', 2)))[1] from numbers(10000000) format Null + select materialize(array(map('hello', 1, 'world', 2)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[1] from numbers(10000000) format Null + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[materialize(1)] from numbers(10000000) format Null diff --git a/tests/performance/final_big_column.xml b/tests/performance/final_big_column.xml new file mode 100644 index 00000000000..1fd586d2d90 --- /dev/null +++ b/tests/performance/final_big_column.xml @@ -0,0 +1,21 @@ + + + 1 + 20G + + + + CREATE TABLE optimized_select_final (d Date, key UInt64, value String) + ENGINE = ReplacingMergeTree() + PARTITION BY toYYYYMM(d) ORDER BY key + + + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000) + + SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8 + SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null + + DROP TABLE IF EXISTS optimized_select_final + + diff --git a/tests/performance/multiif.xml b/tests/performance/multiif.xml new file mode 100644 index 00000000000..ad56ab3f5f2 --- /dev/null +++ b/tests/performance/multiif.xml @@ -0,0 +1,8 @@ + + CREATE TABLE test_multiif_t(d Nullable(Int64)) ENGINE Memory + INSERT INTO test_multiif_t SELECT * from numbers(300000000) + + select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1 + + DROP TABLE IF EXISTS test_multiif_t + diff --git a/tests/performance/parallel_final.xml b/tests/performance/parallel_final.xml index d7ea0240105..97261f93983 100644 --- a/tests/performance/parallel_final.xml +++ b/tests/performance/parallel_final.xml @@ -9,51 +9,53 @@ - collapsing + replacing - collapsing_final_16p_ord - collapsing_final_16p_rnd - collapsing_final_16p_int_keys_ord - collapsing_final_16p_int_keys_rnd - collapsing_final_16p_str_keys_ord - collapsing_final_16p_str_keys_rnd - collapsing_final_1024p_ord - collapsing_final_1024p_rnd - collapsing_final_1p_ord + replacing_final_16p_ord + replacing_final_16p_rnd + replacing_final_16p_int_keys_ord + replacing_final_16p_int_keys_rnd + replacing_final_16p_str_keys_ord + replacing_final_16p_str_keys_rnd + replacing_final_1024p_ord + replacing_final_1024p_rnd + replacing_final_1p_ord - create table collapsing_final_16p_ord (key1 UInt32, key2 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_rnd (key1 UInt32, key2 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) partition by key1 % 16 - create table collapsing_final_16p_int_keys_ord (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_int_keys_rnd (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 - create table collapsing_final_16p_str_keys_ord (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_str_keys_rnd (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 - create table collapsing_final_1024p_ord (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by intDiv(key1, 8192 * 2) - create table collapsing_final_1024p_rnd (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024 - create table collapsing_final_1p_ord (key1 UInt64, key2 UInt64, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) + create table replacing_final_16p_ord (key1 UInt32, key2 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_rnd (key1 UInt32, key2 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) partition by key1 % 16 + create table replacing_final_16p_int_keys_ord (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_int_keys_rnd (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 + create table replacing_final_16p_str_keys_ord (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_str_keys_rnd (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 + create table replacing_final_1024p_ord (key1 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1) partition by intDiv(key1, 8192 * 2) + create table replacing_final_1024p_rnd (key1 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1) partition by key1 % 1024 + create table replacing_final_1p_ord (key1 UInt64, key2 UInt64, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) - insert into collapsing_final_16p_ord select number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_rnd select sipHash64(number), number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_int_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_int_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_str_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_str_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_ord select number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_rnd select sipHash64(number), number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_int_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_int_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_str_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_str_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_1024p_ord select number, 1, number from numbers_mt(16777216) - insert into collapsing_final_1024p_rnd select number, 1, number from numbers_mt(16777216) + insert into replacing_final_1024p_ord select number, 1, number from numbers_mt(16777216) + insert into replacing_final_1024p_rnd select number, 1, number from numbers_mt(16777216) - insert into collapsing_final_1p_ord select number, number + 1, 1, number from numbers_mt(5e7) + insert into replacing_final_1p_ord select number, number + 1, 1, number from numbers_mt(5e7) - optimize table {collapsing} final + optimize table {replacing} final - SELECT count() FROM {collapsing} final - SELECT sum(s) FROM {collapsing} final group by key1 limit 10 - SELECT sum(s) FROM {collapsing} final group by key1 % 8192 limit 10 + SELECT count() FROM {replacing} final + SELECT sum(s) FROM {replacing} final group by key1 limit 10 + SELECT sum(s) FROM {replacing} final group by key1 % 8192 limit 10 - DROP TABLE IF EXISTS {collapsing} + DROP TABLE IF EXISTS {replacing} + + DROP TABLE IF EXISTS {replacing} diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 6d1a271355e..7dc522dca7a 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -1223,7 +1223,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name, 'slower' test_status, - test_desc_.2 test_duration_ms, + test_desc_.2*1e3 test_duration_ms, 'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url from queries array join map('old', left, 'new', right) as test_desc_ @@ -1232,7 +1232,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name, 'unstable' test_status, - test_desc_.2 test_duration_ms, + test_desc_.2*1e3 test_duration_ms, 'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url from queries array join map('old', left, 'new', right) as test_desc_ diff --git a/tests/performance/scripts/download.sh b/tests/performance/scripts/download.sh index cb243b655c6..7a740a38fd2 100755 --- a/tests/performance/scripts/download.sh +++ b/tests/performance/scripts/download.sh @@ -67,8 +67,8 @@ function download mkdir ~/fg ||: ( cd ~/fg - wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" - wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl" + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/cd9ee4c4449775a2f867acf31c84b7fe4b132ad5/flamegraph.pl" + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/cd9ee4c4449775a2f867acf31c84b7fe4b132ad5/difffolded.pl" chmod +x ~/fg/difffolded.pl chmod +x ~/fg/flamegraph.pl ) & diff --git a/tests/performance/scripts/entrypoint.sh b/tests/performance/scripts/entrypoint.sh index 95ffe44b654..ec7e4d96dde 100755 --- a/tests/performance/scripts/entrypoint.sh +++ b/tests/performance/scripts/entrypoint.sh @@ -14,7 +14,8 @@ SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" function curl_with_retry { for _ in 1 2 3 4 5 6 7 8 9 10; do - if curl --fail --head "$1";then + if curl --fail --head "$1" + then return 0 else sleep 1 diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference new file mode 100644 index 00000000000..31a4360469f --- /dev/null +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference @@ -0,0 +1,6 @@ +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) +2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) +3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) +2 +10001 diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql new file mode 100644 index 00000000000..92748efd2d1 --- /dev/null +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql @@ -0,0 +1,50 @@ +--Tags: no-fasttest, no-cpu-aarch64, no-cpu-s390x +-- no-fasttest because ZSTD_QAT isn't available in fasttest +-- no-cpu-aarch64 and no-cpu-s390x because ZSTD_QAT is x86-only + +SET enable_zstd_qat_codec = 1; + +-- Suppress test failures because stderr contains warning "Initialization of hardware-assisted ZSTD_QAT codec failed, falling back to software ZSTD coded." +SET send_logs_level = 'fatal'; + +DROP TABLE IF EXISTS compression_codec; + +-- negative test +CREATE TABLE compression_codec(id UInt64 CODEC(ZSTD_QAT(0))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } +CREATE TABLE compression_codec(id UInt64 CODEC(ZSTD_QAT(13))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } + +CREATE TABLE compression_codec( + id UInt64 CODEC(ZSTD_QAT), + data String CODEC(ZSTD_QAT), + ddd Date CODEC(ZSTD_QAT), + ddd32 Date32 CODEC(ZSTD_QAT), + somenum Float64 CODEC(ZSTD_QAT), + somestr FixedString(3) CODEC(ZSTD_QAT), + othernum Int64 CODEC(ZSTD_QAT), + somearray Array(UInt8) CODEC(ZSTD_QAT), + somemap Map(String, UInt32) CODEC(ZSTD_QAT), + sometuple Tuple(UInt16, UInt64) CODEC(ZSTD_QAT), +) ENGINE = MergeTree() ORDER BY tuple(); + +SHOW CREATE TABLE compression_codec; + +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); + +SELECT * FROM compression_codec ORDER BY id; + +OPTIMIZE TABLE compression_codec FINAL; + +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8, [10,11,12], map('k7',7,'k8',8), tuple(7,8)); + +DETACH TABLE compression_codec; +ATTACH TABLE compression_codec; + +SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; + +INSERT INTO compression_codec SELECT 3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6) FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM compression_codec WHERE id = 3 GROUP BY id; + +DROP TABLE IF EXISTS compression_codec; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 5264d51fa73..43b15ded93d 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -44,11 +44,14 @@ hello (-100,200,300) [-100,0,0] [-100,NULL,NULL] +[-100,NULL,NULL] [0,200,0] [NULL,200,NULL] +[NULL,200,NULL] -100 200 \N +\N 1 Thursday Friday @@ -209,11 +212,14 @@ hello (-100,200,300) [-100,0,0] [-100,NULL,NULL] +[-100,NULL,NULL] [0,200,0] [NULL,200,NULL] +[NULL,200,NULL] -100 200 \N +\N 1 Thursday Friday diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index 16cc72f7fdc..e19dd17670e 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -56,11 +56,14 @@ SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Float3 SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Tuple(Int8, Float32, UInt16)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Int8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(Int8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(UInt8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(UInt8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(UInt8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1, 'Int8'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2, 'Int32'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'LowCardinality(Nullable(Int64))'); SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8'); SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); @@ -241,11 +244,14 @@ SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Float3 SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Tuple(Int8, Float32, UInt16)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Int8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(Int8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(UInt8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(UInt8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(UInt8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1, 'Int8'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2, 'Int32'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'LowCardinality(Nullable(Int64))'); SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8'); SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 1a3a271528c..f9f5c2bd3df 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -150,6 +150,7 @@ SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER',' SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM +SYSTEM JEMALLOC ['SYSTEM JEMALLOC PURGE','SYSTEM JEMALLOC ENABLE PROFILE','SYSTEM JEMALLOC DISABLE PROFILE','SYSTEM JEMALLOC FLUSH PROFILE'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference index 64bb6b746dc..209e3ef4b62 100644 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference @@ -1 +1 @@ -30 +20 diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index f07ffc02e4f..9d9c6b920b6 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --connect_timeout_with_failover_ms 1 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l diff --git a/tests/queries/0_stateless/01451_normalize_query.reference b/tests/queries/0_stateless/01451_normalize_query.reference index 339ad34ea77..b331e139dc4 100644 --- a/tests/queries/0_stateless/01451_normalize_query.reference +++ b/tests/queries/0_stateless/01451_normalize_query.reference @@ -23,3 +23,23 @@ SELECT ?.. SELECT ? xyz11 SELECT ?, xyz11 SELECT ?.. +? - ? +?.. +-?.. +? - ?.. +f(-?..) +[-?..] +? + ? +?.. +-?.. +? + ?.. +f(+?..) +[+?..] +?.. + ? +?.. - ?.. +f(+?..), ? +[+?..] - ? +-?.. - [+?..] - ? +(+?..) - ? +-?.. - (+?..) - ? +(+?..) diff --git a/tests/queries/0_stateless/01451_normalize_query.sql b/tests/queries/0_stateless/01451_normalize_query.sql index 3c01a975712..14c74d95019 100644 --- a/tests/queries/0_stateless/01451_normalize_query.sql +++ b/tests/queries/0_stateless/01451_normalize_query.sql @@ -23,4 +23,23 @@ SELECT normalizeQuery('SELECT 1, ''xyz11'''); SELECT normalizeQuery('SELECT $doc$VALUE$doc$ xyz11'); SELECT normalizeQuery('SELECT $doc$VALUE$doc$, xyz11'); SELECT normalizeQuery('SELECT $doc$VALUE$doc$, ''xyz11'''); - +SELECT normalizeQuery('1 - 2'); +SELECT normalizeQuery('1, -2, 3'); +SELECT normalizeQuery('-1, -2, 3'); +SELECT normalizeQuery('1 - 2, 3, 4'); +SELECT normalizeQuery('f(-2, 3)'); +SELECT normalizeQuery('[-1, 2, 3]'); +SELECT normalizeQuery('1 + 2'); +SELECT normalizeQuery('1, +2, 3'); +SELECT normalizeQuery('-1, +2, 3'); +SELECT normalizeQuery('1 + 2, 3, 4'); +SELECT normalizeQuery('f(+2, 3)'); +SELECT normalizeQuery('[+1, 2, 3]'); +SELECT normalizeQuery('1, 2, 3 + 4'); +SELECT normalizeQuery('1, 2 - 3, 4'); +SELECT normalizeQuery('f(+2, 3), 1'); +SELECT normalizeQuery('[+1, 2, 3] - 1'); +SELECT normalizeQuery('-1, 1 - [+1, 2, 3] - 1'); +SELECT normalizeQuery('(+1, 2, 3) - 1'); +SELECT normalizeQuery('-1, 1 - (+1, 2, 3) - 1'); +SELECT normalizeQuery('(+1, 2, -3)'); diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index 6e23ab9cdb9..c25f308eda8 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + -- Check remerge_sort_lowered_memory_bytes_ratio setting set max_memory_usage='300Mi'; diff --git a/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference new file mode 100644 index 00000000000..ee824fc9e0a --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference @@ -0,0 +1,3 @@ +81920 +81920 +81920 diff --git a/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql new file mode 100644 index 00000000000..1286b0e74eb --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS aggregate_functions_null_for_empty; + +CREATE TABLE aggregate_functions_null_for_empty (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO aggregate_functions_null_for_empty SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS aggregate_functions_null_for_empty = true; + +SELECT count() FROM aggregate_functions_null_for_empty; + +DROP TABLE aggregate_functions_null_for_empty; + +DROP TABLE IF EXISTS transform_null_in; + +CREATE TABLE transform_null_in (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y in (1,2,3)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO transform_null_in SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS transform_null_in = true; + +SELECT count() FROM transform_null_in; + +DROP TABLE transform_null_in; + +DROP TABLE IF EXISTS legacy_column_name_of_tuple_literal; + +CREATE TABLE legacy_column_name_of_tuple_literal (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y in (1,2,3)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO legacy_column_name_of_tuple_literal SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS legacy_column_name_of_tuple_literal = true; + +SELECT count() FROM legacy_column_name_of_tuple_literal; + +DROP TABLE legacy_column_name_of_tuple_literal; diff --git a/tests/queries/0_stateless/01753_fix_clickhouse_format.reference b/tests/queries/0_stateless/01753_fix_clickhouse_format.reference index 0aad4d64c55..735d4099534 100644 --- a/tests/queries/0_stateless/01753_fix_clickhouse_format.reference +++ b/tests/queries/0_stateless/01753_fix_clickhouse_format.reference @@ -1,5 +1,4 @@ -SELECT 1 -; +SELECT 1; SELECT 1 UNION ALL @@ -10,8 +9,7 @@ UNION ALL ) ; -SELECT 1 -; +SELECT 1; SELECT 1 UNION ALL @@ -22,4 +20,6 @@ UNION ALL ) ; +INSERT INTO t VALUES (1); + OK diff --git a/tests/queries/0_stateless/01753_fix_clickhouse_format.sh b/tests/queries/0_stateless/01753_fix_clickhouse_format.sh index 5cdd53b2166..ba7fe949833 100755 --- a/tests/queries/0_stateless/01753_fix_clickhouse_format.sh +++ b/tests/queries/0_stateless/01753_fix_clickhouse_format.sh @@ -8,4 +8,6 @@ echo "select 1; select 1 union all (select 1 union distinct select 1); " | $CL echo "select 1; select 1 union all (select 1 union distinct select 1); -- comment " | $CLICKHOUSE_FORMAT -n; -echo "insert into t values (1); " | $CLICKHOUSE_FORMAT -n 2>&1 \ | grep -F -q "Code: 578" && echo 'OK' || echo 'FAIL' +echo "insert into t values (1); " | $CLICKHOUSE_FORMAT -n + +echo 'insert into t format JSONEachRow {"a":1};' | $CLICKHOUSE_FORMAT -n 2>&1 \ | grep -F -q "NOT_IMPLEMENTED" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/01861_explain_pipeline.sql b/tests/queries/0_stateless/01861_explain_pipeline.sql index 93c82b6e265..99ea52ebfa4 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.sql +++ b/tests/queries/0_stateless/01861_explain_pipeline.sql @@ -3,8 +3,8 @@ CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a SETTINGS in INSERT INTO test select number, number from numbers(5); INSERT INTO test select number, number from numbers(5,2); set max_threads =1; -explain pipeline select * from test final; +explain pipeline select * from test final SETTINGS enable_vertical_final = 0; select * from test final; set max_threads =2; -explain pipeline select * from test final; +explain pipeline select * from test final SETTINGS enable_vertical_final = 0; DROP TABLE test; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference index e88c7e018d2..99ebfdcf5bd 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.reference +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -1,5 +1,6 @@ ('123','456','[7,8,9]') \N +\N 123 123 diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index e81d527a3da..4b46db31559 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -2,6 +2,7 @@ select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'Nullable(String)'); +with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'LowCardinality(Nullable(String))'); select JSONExtractString('{"a": 123}', 'a'); select JSONExtractString('{"a": "123"}', 'a'); diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql index 83dd708c575..f344b7007d0 100644 --- a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -1,9 +1,9 @@ SET allow_experimental_analyzer = 1; -SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 36 } +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError INVALID_IDENTIFIER } -SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 36 } +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError INVALID_IDENTIFIER } -SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 36 } +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError INVALID_IDENTIFIER } SELECT 'still alive'; diff --git a/tests/queries/0_stateless/02013_json_function_null_column.reference b/tests/queries/0_stateless/02013_json_function_null_column.reference index ab702ab52cb..a8ffccc46bf 100644 --- a/tests/queries/0_stateless/02013_json_function_null_column.reference +++ b/tests/queries/0_stateless/02013_json_function_null_column.reference @@ -1,9 +1,11 @@ \N Nullable(String) +\N LowCardinality(Nullable(String)) String \N Nullable(String) Nullable(String) \N Nullable(Nothing) \N Nullable(Nothing) +\N Nullable(Nothing) b \N @@ -21,3 +23,6 @@ true a \N \N +('value') +(NULL) +(NULL) diff --git a/tests/queries/0_stateless/02013_json_function_null_column.sql b/tests/queries/0_stateless/02013_json_function_null_column.sql index 94a2320cefb..963d0ee55cc 100644 --- a/tests/queries/0_stateless/02013_json_function_null_column.sql +++ b/tests/queries/0_stateless/02013_json_function_null_column.sql @@ -1,9 +1,12 @@ SELECT JSONExtract('{"string_value":null}', 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract('{"string_value":null}', 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); SELECT JSONExtract('{"string_value":null}', 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtract(NULL, 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract(NULL, 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); SELECT JSONExtract(NULL, 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtractString('["a", "b", "c", "d", "e"]', idx) FROM (SELECT arrayJoin([2, NULL, 2147483646, 65535, 65535, 3]) AS idx); @@ -11,6 +14,7 @@ SELECT JSONExtractInt('[1]', toNullable(1)); SELECT JSONExtractBool('[1]', toNullable(1)); SELECT JSONExtractFloat('[1]', toNullable(1)); SELECT JSONExtractString('["a"]', toNullable(1)); +SELECT JSONExtractInt('[1]', toLowCardinality(toNullable(1))); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtractArrayRaw('["1"]', toNullable(1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtractKeysAndValuesRaw('["1"]', toNullable(1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -23,3 +27,7 @@ SELECT JSONExtract('[1]', toNullable(1), 'Nullable(Float)'); SELECT JSONExtract('["a"]', toNullable(1), 'Nullable(String)'); SELECT JSONExtract('["a"]', toNullable(1), 'Nullable(Int)'); SELECT JSONExtract('["-a"]', toNullable(1), 'Nullable(Int)'); + +SELECT JSONExtract(materialize('{"key":"value"}'), 'Tuple(key LowCardinality(Nullable(String)))'); +SELECT JSONExtract(materialize('{"key":null}'), 'Tuple(key LowCardinality(Nullable(String)))'); +SELECT JSONExtract(materialize('{"not_a_key":"value"}'), 'Tuple(key LowCardinality(Nullable(String)))'); diff --git a/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference index 030d5a8f5af..518ecfdb141 100644 --- a/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference +++ b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference @@ -9,7 +9,7 @@ AlterCommand MODIFY_TTL (children 1) AlterCommand MATERIALIZE_TTL (children 1) AlterCommand MODIFY_SETTING (children 1) - AlterCommand RESET_SETTING + AlterCommand RESET_SETTING (children 1) AlterCommand MODIFY_QUERY (children 1) AlterCommand REMOVE_TTL AlterCommand REMOVE_SAMPLE_BY diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..7e6d64b6b9f 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,14 +10,14 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123457 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.123457 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 diff --git a/tests/queries/0_stateless/02245_make_datetime64.reference b/tests/queries/0_stateless/02245_make_datetime64.reference index 0ac672ae54d..1c7d31788e3 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.reference +++ b/tests/queries/0_stateless/02245_make_datetime64.reference @@ -67,3 +67,4 @@ DateTime64(7, \'UTC\') 1900-01-01 00:00:00.000 1900-01-01 00:00:00.000 1900-01-01 00:00:00.000 +2024-01-08 11:12:13.014 diff --git a/tests/queries/0_stateless/02245_make_datetime64.sql b/tests/queries/0_stateless/02245_make_datetime64.sql index 62784cb9b75..71629ad8dff 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.sql +++ b/tests/queries/0_stateless/02245_make_datetime64.sql @@ -82,6 +82,9 @@ select makeDateTime64(1991, 8, 24, 65537, 4, 0); select makeDateTime64(1991, 8, 24, 21, 65537, 0); select makeDateTime64(1991, 8, 24, 21, 4, 65537); +-- bug 58590 +select makeDateTime64(2024, 1, 8, 11, 12, 13, materialize(14)); + select makeDateTime64(year, 1, 1, 1, 0, 0, 0, precision, timezone) from ( select 1984 as year, 5 as precision, 'UTC' as timezone union all diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index e2d1ec3980e..2bba75f6788 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -1,7 +1,7 @@ [multi] insert into foo settings max_threads=1 Syntax error (query): failed at position 40 (end of query): [multi] insert into foo format tsv settings max_threads=1 -Can't format ASTInsertQuery with data, since data will be lost. +NOT_IMPLEMENTED [multi] insert into foo format tsv settings max_threads=1 INSERT INTO foo SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 8b156ffec83..49aa56d6c0a 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -25,7 +25,7 @@ function run_format_both() run_format 'insert into foo settings max_threads=1' |& grep --max-count 2 --only-matching -e "Syntax error (query): failed at position .* (end of query):" -e '^\[.*$' # compatibility -run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "Can't format ASTInsertQuery with data, since data will be lost." -e '^\[.*$' +run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "NOT_IMPLEMENTED" -e '^\[.*$' run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert |& grep --max-count 2 --only-matching -e "You have SETTINGS before and after FORMAT" -e '^\[.*$' diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index 8f08bd6f84b..a08928a773c 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ - ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) + ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) READ_ROWS=$(echo "${CURL_OUTPUT}" | \ grep 'X-ClickHouse-Summary' | \ diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 21a1b643d98..bb12d1f251f 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -1,23 +1,23 @@ -- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ --- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, --- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, --- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, --- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, --- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, --- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, --- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, --- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, --- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, --- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, --- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, -- dynamic(null), 'why', 'Professional', 'Partial College', 38 -- ] DROP TABLE IF EXISTS Customers; CREATE TABLE Customers -( +( FirstName Nullable(String), - LastName String, + LastName String, Occupation String, Education String, Age Nullable(UInt8) @@ -89,9 +89,9 @@ print '-- Summarize following sort --'; Customers | sort by FirstName | summarize count() by Occupation | sort by Occupation; print '-- summarize with bin --'; -EventLog | summarize count=count() by bin(Created, 1000); -EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); -EventLog | summarize count=count() by time_label=bin(Created/1000, 1s); +EventLog | summarize count=count() by bin(Created, 1000) | sort by count asc; +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s) | sort by count asc; +EventLog | summarize count=count() by time_label=bin(Created/1000, 1s) | sort by count asc; Dates | project bin(datetime(EventTime), 1m); print '-- make_list_with_nulls --'; Customers | summarize t = make_list_with_nulls(FirstName); diff --git a/tests/queries/0_stateless/02373_progress_contain_result.sh b/tests/queries/0_stateless/02373_progress_contain_result.sh index c87a5ec7615..fd343df1013 100755 --- a/tests/queries/0_stateless/02373_progress_contain_result.sh +++ b/tests/queries/0_stateless/02373_progress_contain_result.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo 'SELECT 1 FROM numbers(100)' | - ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | + ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference index 676fb441f53..e08bc754a71 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference @@ -1,11 +1,3 @@ -storage String -source String -format String -additional_format_info String -registration_time DateTime -schema Nullable(String) -number_of_rows Nullable(UInt64) -schema_inference_mode Nullable(String) x Nullable(Int64) s Nullable(String) x Nullable(Int64) diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql index 310e22ed31f..64b6cd86fc7 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql @@ -4,7 +4,6 @@ set input_format_json_try_infer_numbers_from_strings=1; insert into function file('02374_data1.jsonl') select number as x, 'str' as s from numbers(10); insert into function file('02374_data2.jsonl') select number as x, 'str' as s from numbers(10); -desc system.schema_inference_cache; system drop schema cache for file; desc file('02374_data1.jsonl'); diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference index 5dd39c39852..365725f8ffe 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference @@ -1,3 +1,10 @@ +-- { echoOn } + +SET join_algorithm = 'hash'; +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); Expression Header: key String value String @@ -21,6 +28,121 @@ Header: key String __table3.value String ReadFromStorage Header: dummy UInt8 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +1 1 1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; +1 1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 0 0 0 +0 0 0 0 0 5 5 5 +0 0 0 0 0 6 6 6 +0 0 0 0 0 7 7 7 +0 0 0 0 0 8 8 8 +0 0 0 0 0 9 9 9 +0 0 0 0 0 10 10 10 +1 1 1 1 1 0 0 0 +2 2 2 2 2 0 0 0 +3 3 3 3 3 3 3 3 +4 4 4 4 4 4 4 4 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; +1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 1 1 1 0 0 0 0 0 +1 2 2 2 0 0 0 0 0 +2 3 3 3 0 0 0 0 0 +3 4 4 4 4 4 4 4 4 +4 5 5 5 5 5 5 5 5 +5 0 0 0 6 6 6 6 6 +6 0 0 0 7 7 7 7 7 +7 0 0 0 8 8 8 8 8 +8 0 0 0 9 9 9 9 9 +9 0 0 0 10 10 10 10 10 +10 0 0 0 11 11 11 11 11 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; +1 0 0 0 0 0 0 0 0 +2 1 1 1 1 1 0 0 0 +3 2 2 2 2 2 0 0 0 +4 3 3 3 3 3 3 3 3 +5 4 4 4 4 4 4 4 4 +6 0 0 0 0 0 5 5 5 +7 0 0 0 0 0 6 6 6 +8 0 0 0 0 0 7 7 7 +9 0 0 0 0 0 8 8 8 +10 0 0 0 0 0 9 9 9 +11 0 0 0 0 0 10 10 10 +SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 0; +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); Expression Header: key String value String @@ -50,3 +172,113 @@ Header: key String __table3.value String ReadFromStorage Header: dummy UInt8 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +1 1 1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; +1 1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 0 0 0 +0 0 0 0 0 5 5 5 +0 0 0 0 0 6 6 6 +0 0 0 0 0 7 7 7 +0 0 0 0 0 8 8 8 +0 0 0 0 0 9 9 9 +0 0 0 0 0 10 10 10 +1 1 1 1 1 0 0 0 +2 2 2 2 2 0 0 0 +3 3 3 3 3 3 3 3 +4 4 4 4 4 4 4 4 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; +1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 1 1 1 0 0 0 0 0 +1 2 2 2 0 0 0 0 0 +2 3 3 3 0 0 0 0 0 +3 4 4 4 4 4 4 4 4 +4 5 5 5 5 5 5 5 5 +5 0 0 0 6 6 6 6 6 +6 0 0 0 7 7 7 7 7 +7 0 0 0 8 8 8 8 8 +8 0 0 0 9 9 9 9 9 +9 0 0 0 10 10 10 10 10 +10 0 0 0 11 11 11 11 11 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; +1 0 0 0 0 0 0 0 0 +2 1 1 1 1 1 0 0 0 +3 2 2 2 2 2 0 0 0 +4 3 3 3 3 3 3 3 3 +5 4 4 4 4 4 4 4 4 +6 0 0 0 0 0 5 5 5 +7 0 0 0 0 0 6 6 6 +8 0 0 0 0 0 7 7 7 +9 0 0 0 0 0 8 8 8 +10 0 0 0 0 0 9 9 9 +11 0 0 0 0 0 10 10 10 diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql deleted file mode 100644 index dfcd8c12e11..00000000000 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql +++ /dev/null @@ -1,16 +0,0 @@ -SET allow_experimental_analyzer = 1; -SET join_algorithm = 'hash'; - -EXPLAIN actions=0, description=0, header=1 -SELECT * FROM ( SELECT 'key2' AS key ) AS s1 -JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 -USING (key); - -SET join_algorithm = 'full_sorting_merge'; - -SET max_rows_in_set_to_optimize_join = 0; - -EXPLAIN actions=0, description=0, header=1 -SELECT * FROM ( SELECT 'key2' AS key ) AS s1 -JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 -USING (key); diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 new file mode 100644 index 00000000000..ca4af4df6b6 --- /dev/null +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 @@ -0,0 +1,62 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } + +{% for query_settings in ['join_algorithm = \'hash\'', 'join_algorithm = \'full_sorting_merge\', max_rows_in_set_to_optimize_join = 0'] -%} + +SET {{ query_settings }}; + +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); + +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; + +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; + +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; + +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; + +{% endfor -%} diff --git a/tests/queries/0_stateless/02494_query_cache_user_isolation.reference b/tests/queries/0_stateless/02494_query_cache_user_isolation.reference new file mode 100644 index 00000000000..f8c4b31b22a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_isolation.reference @@ -0,0 +1,28 @@ +Attack 1 +0 +system.query_cache with old user 1 +0 +0 1 +1 0 +system.query_cache with new user 0 +0 +0 1 +1 0 +0 1 +Attack 2 +-- policy_1 test +1 1 +3 1 +6 1 +-- policy_2 test +2 2 +5 2 +8 2 +-- policy_1 with query cache test +1 1 +3 1 +6 1 +-- policy_2 with query cache test +2 2 +5 2 +8 2 diff --git a/tests/queries/0_stateless/02494_query_cache_user_isolation.sh b/tests/queries/0_stateless/02494_query_cache_user_isolation.sh new file mode 100755 index 00000000000..d55e2460619 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_isolation.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest, long +# Tag no-parallel: Messes with internal cache +# no-fasttest: Produces wrong results in fasttest, unclear why, didn't reproduce locally. +# long: Sloooow ... + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# -- Attack 1: +# - create a user, +# - run a query whose result is stored in the query cache, +# - drop the user, recreate it with the same name +# - test that the cache entry is inaccessible + +echo "Attack 1" + +rnd=`tr -dc 1-9 (exponent, in) +-- INSERT INTO t_leading_zeroes_f VALUES (100, '00009e00009', 00009e00009, 9e9), (101, '-00009e00009', -00009e00009, -9e9), (102, '+00009e00009', +00009e00009, 9e9) + +SELECT 'Leading zeroes into Float32'; +SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id; + +DROP TABLE IF EXISTS t_leading_zeroes; +DROP TABLE IF EXISTS t_leading_zeroes_f; \ No newline at end of file diff --git a/tests/queries/0_stateless/02918_multif_for_nullable.reference b/tests/queries/0_stateless/02918_multif_for_nullable.reference new file mode 100644 index 00000000000..f58086cfee1 --- /dev/null +++ b/tests/queries/0_stateless/02918_multif_for_nullable.reference @@ -0,0 +1,5 @@ +-1 -1 -1 +1 -1 -1 +1 1 -1 +1 2 \N +1 3 \N diff --git a/tests/queries/0_stateless/02918_multif_for_nullable.sh b/tests/queries/0_stateless/02918_multif_for_nullable.sh new file mode 100755 index 00000000000..cd9ac8b904f --- /dev/null +++ b/tests/queries/0_stateless/02918_multif_for_nullable.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (d Nullable(Int64)) engine=Memory" +$CLICKHOUSE_CLIENT -q "insert into test_tbl select * from numbers(5)" +$CLICKHOUSE_CLIENT -q "select multiIf(d > 0, 1, -1), multiIf(d > 1, d-1, -1), multiIf(d > 2, null, -1) from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference new file mode 100644 index 00000000000..2d97dd0e12e --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference @@ -0,0 +1,29 @@ +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; +0 250 +1 250 +2 250 +3 250 +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql new file mode 100644 index 00000000000..b9bc6974c47 --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS 02918_parallel_replicas; + +CREATE TABLE 02918_parallel_replicas (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x); + +INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM numbers(1000); + +SET prefer_localhost_replica=0; + +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; + +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; + +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +-- { echoOff } + +DROP TABLE 02918_parallel_replicas; diff --git a/tests/queries/0_stateless/02932_idna.reference b/tests/queries/0_stateless/02932_idna.reference new file mode 100644 index 00000000000..0947194c07f --- /dev/null +++ b/tests/queries/0_stateless/02932_idna.reference @@ -0,0 +1,88 @@ +-- Negative tests +-- Regular cases +straße.de xn--strae-oqa.de xn--strae-oqa.de straße.de straße.de +2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 +AMAZON amazon amazon amazon amazon +aa-- aa-- aa-- aa-- aa-- +a†-- xn--a---kp0a xn--a---kp0a a†-- a†-- +ab--c ab--c ab--c ab--c ab--c +-† xn----xhn xn----xhn -† -† +-x.xn--zca -x.xn--zca -x.xn--zca -x.ß -x.ß +x-.xn--zca x-.xn--zca x-.xn--zca x-.ß x-.ß +x-.ß x-.xn--zca x-.xn--zca x-.ß x-.ß +x..ß x..xn--zca x..xn--zca x..ß x..ß +128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 +xn--zca.xn--zca xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß +xn--zca.ß xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß +x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x +x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß +x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß +01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x +≠ xn--1ch xn--1ch ≠ ≠ +aa-- aa-- aa-- aa-- +ab--c ab--c ab--c ab--c +-x -x -x -x + +xn--1ch ≠ xn--1ch xn--1ch +xn--dqd20apc ᄎᆞᆷ xn--dqd20apc xn--dqd20apc +xn--gdh ≮ xn--gdh xn--gdh +xn--80aaa0ahbbeh4c йайзаакпий xn--80aaa0ahbbeh4c xn--80aaa0ahbbeh4c +xn--3bs854c 团淄 xn--3bs854c xn--3bs854c +xn--mgb9awbf عمان xn--mgb9awbf xn--mgb9awbf +xn--mgbaam7a8h امارات xn--mgbaam7a8h xn--mgbaam7a8h +xn--mgbbh1a71e بھارت xn--mgbbh1a71e xn--mgbbh1a71e +xn--s7y.com 短.com xn--s7y.com xn--s7y.com +xn--55qx5d.xn--tckwe 公司.コム xn--55qx5d.xn--tckwe xn--55qx5d.xn--tckwe +xn--4dbrk0ce ישראל xn--4dbrk0ce xn--4dbrk0ce +xn--zckzah テスト xn--zckzah xn--zckzah +xn--p1ai.com рф.com xn--p1ai.com xn--p1ai.com +xn--mxahbxey0c.gr εχαμπλε.gr xn--mxahbxey0c.gr xn--mxahbxey0c.gr +xn--h2brj9c भारत xn--h2brj9c xn--h2brj9c +xn--d1acpjx3f.xn--p1ai яндекс.рф xn--d1acpjx3f.xn--p1ai xn--d1acpjx3f.xn--p1ai +xn--q9jyb4c みんな xn--q9jyb4c xn--q9jyb4c +xn--sterreich-z7a.at österreich.at xn--sterreich-z7a.at xn--sterreich-z7a.at +xn--h2breg3eve.xn--h2brj9c भारतम्.भारत xn--h2breg3eve.xn--h2brj9c xn--h2breg3eve.xn--h2brj9c +ejemplo.xn--q9jyb4c ejemplo.みんな ejemplo.xn--q9jyb4c ejemplo.xn--q9jyb4c +xn--9t4b11yi5a.com 테스트.com xn--9t4b11yi5a.com xn--9t4b11yi5a.com +xn--gk3at1e.com 通販.com xn--gk3at1e.com xn--gk3at1e.com +xn--42c2d9a คอม xn--42c2d9a xn--42c2d9a +1xn-- 1xn-- 1xn-- 1xn-- +xn--bih.com ⌘.com xn--bih.com xn--bih.com +xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c موقع.وزارة-الأتصالات.مصر xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c +xn--mgbb9fbpob موبايلي xn--mgbb9fbpob xn--mgbb9fbpob +xn--55qw42g.xn--55qw42g 公益.公益 xn--55qw42g.xn--55qw42g xn--55qw42g.xn--55qw42g +≠ ≠ xn--1ch xn--1ch +ファッション.biz ファッション.biz xn--bck1b9a5dre4c.biz xn--bck1b9a5dre4c.biz +-- Special cases +---- Empty input + + + +---- NULL input +\N +\N +\N +---- Garbage inputs for idnaEncode + + + + +---- Long input +Row 1: +────── +idna: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +ascii: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk. +ascii_try: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk. +original: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen. +original_try: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen. +---- Non-const input + +münchen xn--mnchen-3ya xn--mnchen-3ya münchen münchen +straße.münchen.de xn--strae-oqa.xn--mnchen-3ya.de xn--strae-oqa.xn--mnchen-3ya.de straße.münchen.de straße.münchen.de +---- Non-const input with invalid values sprinkled in +london.co.uk london.co.uk london.co.uk +microsoft.com microsoft.com microsoft.com +xn-- +xn-- +xn--tešla +ytraße.münchen.de xn--ytrae-oqa.xn--mnchen-3ya.de ytraße.münchen.de diff --git a/tests/queries/0_stateless/02932_idna.sql b/tests/queries/0_stateless/02932_idna.sql new file mode 100644 index 00000000000..db7688064f2 --- /dev/null +++ b/tests/queries/0_stateless/02932_idna.sql @@ -0,0 +1,124 @@ +-- Tags: no-fasttest +-- no-fasttest: requires idna library + +-- See also 02932_punycode.sql + +SELECT '-- Negative tests'; + +SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryIdnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tryIdnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryIdnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT idnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT tryIdnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT idnaDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } + +SELECT '-- Regular cases'; + +-- The test cases originate from the ada idna unit tests: +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt +-- +SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '-†' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '≠' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; + +SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +-- +SELECT '-- Special cases'; + +SELECT '---- Empty input'; +SELECT idnaEncode(''); +SELECT tryIdnaEncode(''); +SELECT idnaDecode(''); + +SELECT '---- NULL input'; +SELECT idnaEncode(NULL); +SELECT tryIdnaEncode(NULL); +SELECT idnaDecode(NULL); + +SELECT '---- Garbage inputs for idnaEncode'; +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt +SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--'); +SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('ﻱa'); +SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--a-yoc'); +SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--tešla'); + +SELECT '---- Long input'; +SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FORMAT Vertical; + +SELECT '---- Non-const input'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna; +INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen'); +SELECT idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FROM tab; +DROP TABLE tab; + +SELECT '---- Non-const input with invalid values sprinkled in'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna; +INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('ytraße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--'); +SELECT idna, idnaEncode(idna) AS ascii FROM tab; -- { serverError BAD_ARGUMENTS } +SELECT idna, tryIdnaEncode(idna) AS ascii, idnaDecode(ascii) AS original FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference new file mode 100644 index 00000000000..ff05eaa72a3 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.reference @@ -0,0 +1,55 @@ +-- Negative tests +-- Regular cases +a a- a a +A A- A A +-- --- -- -- +London London- London London +Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson Lloyd-Atkinson +This has spaces This has spaces- This has spaces This has spaces +-> $1.00 <- -> $1.00 <-- -> $1.00 <- -> $1.00 <- +а 80a а а +ü tda ü ü +α mxa α α +例 fsq 例 例 +😉 n28h 😉 😉 +αβγ mxacd αβγ αβγ +München Mnchen-3ya München München +Mnchen-3ya Mnchen-3ya- Mnchen-3ya Mnchen-3ya +München-Ost Mnchen-Ost-9db München-Ost München-Ost +Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost Bahnhof München-Ost +abæcdöef abcdef-qua4k abæcdöef abæcdöef +правда 80aafi6cg правда правда +ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข ยจฆฟคฏข +ドメイン名例 eckwd4c7cu47r2wf ドメイン名例 ドメイン名例 +MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前 MajiでKoiする5秒前 +「bücher」 bcher-kva8445foa 「bücher」 「bücher」 +团淄 3bs854c 团淄 团淄 +-- Special cases +---- Empty input + + + +---- NULL input +\N +\N +\N +---- Garbage Punycode-encoded input + +---- Long input +Row 1: +────── +str: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +puny: Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa +original: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +original_try: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +---- Non-const values +München Mnchen-3ya München München +abc abc- abc abc +aäoöuü aou-qla5gqb aäoöuü aäoöuü +---- Non-const values with invalid values sprinkled in +Also no punycode +London- London +Mnchen-3ya München +No punycode +Rtting-3ya Rütting +XYZ no punycode diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql new file mode 100644 index 00000000000..b9bcf933641 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.sql @@ -0,0 +1,86 @@ +-- Tags: no-fasttest +-- no-fasttest: requires idna library + +-- See also 02932_idna.sql + +SELECT '-- Negative tests'; + +SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryPunycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tryPunycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryPunycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT tryPunycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } + +SELECT '-- Regular cases'; + +-- The test cases originate from the ada idna unit tests: +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt + +SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +-- +SELECT '-- Special cases'; + +SELECT '---- Empty input'; +SELECT punycodeEncode(''); +SELECT punycodeDecode(''); +SELECT tryPunycodeDecode(''); + +SELECT '---- NULL input'; +SELECT punycodeEncode(NULL); +SELECT punycodeDecode(NULL); +SELECT tryPunycodeDecode(NULL); + +SELECT '---- Garbage Punycode-encoded input'; +SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS } +SELECT tryPunycodeDecode('no punycode'); + +SELECT '---- Long input'; +SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FORMAT Vertical; + +SELECT '---- Non-const values'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str; +INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München'); +SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FROM tab; +DROP TABLE tab; + +SELECT '---- Non-const values with invalid values sprinkled in'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny; +INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode'); +SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS } +SELECT puny, tryPunycodeDecode(puny) AS original FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02933_paste_join.reference b/tests/queries/0_stateless/02933_paste_join.reference index 84ae5987926..5ff13917957 100644 --- a/tests/queries/0_stateless/02933_paste_join.reference +++ b/tests/queries/0_stateless/02933_paste_join.reference @@ -72,3 +72,13 @@ UInt64 UInt64 UInt64 UInt64 +0 9 +1 8 +2 7 +3 6 +4 5 +5 4 +6 3 +7 2 +8 1 +9 0 diff --git a/tests/queries/0_stateless/02933_paste_join.sql b/tests/queries/0_stateless/02933_paste_join.sql index 1c346438d77..b103bf72160 100644 --- a/tests/queries/0_stateless/02933_paste_join.sql +++ b/tests/queries/0_stateless/02933_paste_join.sql @@ -32,6 +32,6 @@ INSERT INTO t2 SELECT number, number FROM numbers(15, 15); SELECT * FROM ( SELECT * from t1 ) t1 PASTE JOIN ( SELECT * from t2 ) t2 SETTINGS max_threads = 1; SELECT toTypeName(a) FROM (SELECT number as a FROM numbers(11)) t1 PASTE JOIN (select number as a from numbers(10)) t2 SETTINGS join_use_nulls = 1; SET max_threads = 2; +select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10; select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } -select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=3; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02933_sqid.reference b/tests/queries/0_stateless/02933_sqid.reference index 4506cc4d01a..a559bacb0ac 100644 --- a/tests/queries/0_stateless/02933_sqid.reference +++ b/tests/queries/0_stateless/02933_sqid.reference @@ -1,13 +1,17 @@ -- negative tests -- const UInt* -Uk -XMbT -86Rf07 -Td1EnWQo +Uk [1] +XMbT [1,2] +86Rf07 [1,2,3] +Td1EnWQo [1,2,3,4] XMbT -- non-const UInt* -Uk +Uk [1] +XMbT [1,2] +86Rf07 [1,2,3] +Td1EnWQo [1,2,3,4] XMbT -86Rf07 -Td1EnWQo +-- invalid sqid +[] +-- alias XMbT diff --git a/tests/queries/0_stateless/02933_sqid.sql b/tests/queries/0_stateless/02933_sqid.sql index 3a2873e9c34..81d4b2bc35c 100644 --- a/tests/queries/0_stateless/02933_sqid.sql +++ b/tests/queries/0_stateless/02933_sqid.sql @@ -3,19 +3,27 @@ SET allow_suspicious_low_cardinality_types = 1; SELECT '-- negative tests'; -SELECT sqid(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT sqid('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sqidEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sqidDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sqidEncode('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sqidDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT '-- const UInt*'; -SELECT sqid(1); -SELECT sqid(1, 2); -SELECT sqid(1, 2, 3); -SELECT sqid(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64); -SELECT sqid(toNullable(1), toLowCardinality(2)); +SELECT sqidEncode(1) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1, 2) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1, 2, 3) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(toNullable(1), toLowCardinality(2)) AS sqid; SELECT '-- non-const UInt*'; -SELECT sqid(materialize(1)); -SELECT sqid(materialize(1), materialize(2)); -SELECT sqid(materialize(1), materialize(2), materialize(3)); -SELECT sqid(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)); -SELECT sqid(toNullable(materialize(1)), toLowCardinality(materialize(2))); +SELECT sqidEncode(materialize(1)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1), materialize(2)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1), materialize(2), materialize(3)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(toNullable(materialize(1)), toLowCardinality(materialize(2))); + +SELECT '-- invalid sqid'; +SELECT sqidDecode('invalid sqid'); + +SELECT '-- alias'; +SELECT sqid(1, 2); diff --git a/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference new file mode 100644 index 00000000000..a13ab627f94 --- /dev/null +++ b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference @@ -0,0 +1,9 @@ +1 a 1 +2 a 1 +3 b 1 +4 b 1 +1 \N 1 +2 \N 1 +3 \N 1 +4 \N 1 +\N \N 4 diff --git a/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql new file mode 100644 index 00000000000..03bb7f9e770 --- /dev/null +++ b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test_group_by_with_rollup_order; + +CREATE TABLE test_group_by_with_rollup_order (id Int64, a Nullable(Int64), b Nullable(String)) ENGINE = MergeTree ORDER BY id; + +insert into test_group_by_with_rollup_order values(1,1,'a'); +insert into test_group_by_with_rollup_order values(2,2,'a'); +insert into test_group_by_with_rollup_order values(3,3,'b'); +insert into test_group_by_with_rollup_order values(4,4,'b'); + +SELECT toString(a) as r1, b, count() FROM test_group_by_with_rollup_order GROUP BY r1, b WITH ROLLUP ORDER BY b,r1; + +DROP TABLE IF EXISTS test_group_by_with_rollup_order; diff --git a/tests/queries/0_stateless/02946_format_values.reference b/tests/queries/0_stateless/02946_format_values.reference new file mode 100644 index 00000000000..90b2a3cb8ef --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.reference @@ -0,0 +1,141 @@ +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td') +====================================== +SELECT a +FROM table1 +; + +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +SELECT b +FROM table1 +; + +====================================== +-- begin +SELECT a +FROM table1 +; + +-- some insert query +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +-- more comments +-- in a row +SELECT b +FROM table1 +; + +-- end +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT + b, + c, + d +FROM table1 +; + +SELECT + b, + c, + d, + e +FROM table1 +; + +SELECT + b, + c, + d, + e, + f +FROM table1 +; + +SELECT + b, + c +FROM +( + SELECT + b, + c + FROM table1 +) +; + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT b, c, d FROM table1; + +SELECT b, c, d, e FROM table1; + +SELECT b, c, d, e, f FROM table1; + +SELECT b, c FROM (SELECT b, c FROM table1); + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +SELECT b, c, d, e, f FROM (SELECT b, c, d, e, f FROM table1) +====================================== +Option 'max_line_length' must be less than 256. +2 +Options 'oneline' and 'max_line_length' are mutually exclusive. +2 diff --git a/tests/queries/0_stateless/02946_format_values.sh b/tests/queries/0_stateless/02946_format_values.sh new file mode 100755 index 00000000000..36e32de42fa --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "insert into table1 values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td')" | ${CLICKHOUSE_FORMAT} + +echo "======================================" + +cat <&1; echo $?; } +{ echo "select 1" | ${CLICKHOUSE_FORMAT} --comments --max_line_length=120 --oneline 2>&1; echo $?; } diff --git a/tests/queries/0_stateless/02947_dropped_tables_parts.reference b/tests/queries/0_stateless/02947_dropped_tables_parts.reference new file mode 100644 index 00000000000..086d55c3d93 --- /dev/null +++ b/tests/queries/0_stateless/02947_dropped_tables_parts.reference @@ -0,0 +1,2 @@ +default 02947_table_1 all_1_1_0 +default 02947_table_2 all_1_1_0 diff --git a/tests/queries/0_stateless/02947_dropped_tables_parts.sql b/tests/queries/0_stateless/02947_dropped_tables_parts.sql new file mode 100644 index 00000000000..554a19ca6b1 --- /dev/null +++ b/tests/queries/0_stateless/02947_dropped_tables_parts.sql @@ -0,0 +1,14 @@ + +DROP TABLE IF EXISTS 02947_table_1; +DROP TABLE IF EXISTS 02947_table_2; + +CREATE TABLE 02947_table_1 (id Int32) Engine=MergeTree() ORDER BY id; +CREATE TABLE 02947_table_2 (id Int32) Engine=MergeTree() ORDER BY id; +INSERT INTO 02947_table_1 VALUES (1),(2); +INSERT INTO 02947_table_2 VALUES (3),(4); + +SELECT database, table, name FROM system.parts WHERE database = currentDatabase() AND startsWith(table, '02947_table_'); +select * from system.dropped_tables_parts format Null; + +DROP TABLE 02947_table_1; +DROP TABLE 02947_table_2; diff --git a/tests/queries/0_stateless/02951_inverted_index_support_match.reference b/tests/queries/0_stateless/02951_inverted_index_support_match.reference new file mode 100644 index 00000000000..9dc8d5b76d9 --- /dev/null +++ b/tests/queries/0_stateless/02951_inverted_index_support_match.reference @@ -0,0 +1,20 @@ +1 Hello ClickHouse +2 Hello World + Granules: 6/6 + Granules: 2/6 + Granules: 6/6 + Granules: 2/6 +--- +1 Hello ClickHouse +2 Hello World +6 World Champion + Granules: 6/6 + Granules: 3/6 + Granules: 6/6 + Granules: 3/6 +--- +5 OLAP Database + Granules: 6/6 + Granules: 1/6 + Granules: 6/6 + Granules: 1/6 diff --git a/tests/queries/0_stateless/02951_inverted_index_support_match.sql b/tests/queries/0_stateless/02951_inverted_index_support_match.sql new file mode 100644 index 00000000000..9ebf10412d9 --- /dev/null +++ b/tests/queries/0_stateless/02951_inverted_index_support_match.sql @@ -0,0 +1,105 @@ +SET allow_experimental_inverted_index = true; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + id UInt32, + str String, + INDEX inv_idx(str) TYPE inverted(0) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 1; + +INSERT INTO tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion'); + +SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id; + +-- Read 2/6 granules +-- Required string: 'Hello ' +-- Alternatives: 'Hello ClickHouse', 'Hello World' + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +SELECT '---'; + +SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id; + +-- Read 3/6 granules +-- Required string: - +-- Alternatives: 'ClickHouse', 'World' + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +SELECT '---'; + +SELECT * FROM tab WHERE match(str, 'OLAP.*') ORDER BY id; + +-- Read 1/6 granules +-- Required string: 'OLAP' +-- Alternatives: - + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference new file mode 100644 index 00000000000..d3a002c4fd4 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -0,0 +1,4 @@ +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.000 +2023-10-09 10:11:12.000 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql new file mode 100644 index 00000000000..178f21a9e63 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql @@ -0,0 +1,4 @@ +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000500', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000499', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02960_alter_table_part_query_parameter.reference b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql new file mode 100644 index 00000000000..31d3ae10271 --- /dev/null +++ b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql @@ -0,0 +1,15 @@ +drop table if exists data; +create table data (key Int) engine=MergeTree() order by key; + +insert into data values (1); + +set param_part='all_1_1_0'; +alter table data detach part {part:String}; +alter table data attach part {part:String}; +set param_part='all_2_2_0'; +alter table data detach part {part:String}; +alter table data drop detached part {part:String} settings allow_drop_detached=1; + +insert into data values (2); +set param_part='all_3_3_0'; +alter table data drop part {part:String}; diff --git a/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference new file mode 100644 index 00000000000..bbb479df203 --- /dev/null +++ b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference @@ -0,0 +1,10 @@ +(0,'2024-01-01') +(0,'2024-01-02') +(0,'2024-01-03') +(0,'2024-01-04') +(0,'2024-01-05') +(0,'2024-01-06') +(0,'2024-01-07') +(0,'2024-01-08') +(0,'2024-01-09') +(0,'2024-01-10') diff --git a/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql new file mode 100644 index 00000000000..8836d10b8f8 --- /dev/null +++ b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql @@ -0,0 +1,19 @@ +set allow_suspicious_low_cardinality_types = true; + +CREATE TABLE test_tuple_filter__fuzz_2 +( + `id` Nullable(UInt32), + `value` LowCardinality(String), + `log_date` LowCardinality(Date) +) +ENGINE = MergeTree +PARTITION BY log_date +ORDER BY id +SETTINGS allow_nullable_key = 1; + +INSERT INTO test_tuple_filter__fuzz_2 SELECT number, toString(number), toDate('2024-01-01') + number FROM numbers(10); + +SELECT + (tuple(log_date) = tuple('2021-01-01'), log_date) +FROM test_tuple_filter__fuzz_2 +ORDER BY log_date; diff --git a/tests/queries/0_stateless/02961_output_format_compress_params.reference b/tests/queries/0_stateless/02961_output_format_compress_params.reference new file mode 100644 index 00000000000..d0752a77fc7 --- /dev/null +++ b/tests/queries/0_stateless/02961_output_format_compress_params.reference @@ -0,0 +1,2 @@ +1 +1000000 diff --git a/tests/queries/0_stateless/02961_output_format_compress_params.sh b/tests/queries/0_stateless/02961_output_format_compress_params.sh new file mode 100755 index 00000000000..7275f9a0b2b --- /dev/null +++ b/tests/queries/0_stateless/02961_output_format_compress_params.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: replica + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +file_with_random_postfix=test_02961_`date +%s%6N`.csv + +${CLICKHOUSE_CLIENT} --query "INSERT INTO FUNCTION file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SELECT number FROM numbers(1000000) SETTINGS output_format_compression_level = 10, output_format_compression_zstd_window_log = 30, engine_file_truncate_on_insert = 1;" +# Simple check that output_format_compression_zstd_window_log = 30 works +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SETTINGS zstd_window_log_max = 29;" 2>&1 | head -n 1 | grep -c "ZSTD_DECODER_FAILED" +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SETTINGS zstd_window_log_max = 30;" diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql new file mode 100644 index 00000000000..669018a1308 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql @@ -0,0 +1,8 @@ +set count_distinct_optimization = 1; + +SELECT uniqExact('257') +FROM + (SELECT + number, CAST(number / 9223372036854775806, 'UInt64') AS m + FROM numbers(3) + ); diff --git a/tests/queries/0_stateless/02962_analyzer_constant_set.reference b/tests/queries/0_stateless/02962_analyzer_constant_set.reference new file mode 100644 index 00000000000..ec635144f60 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_constant_set.reference @@ -0,0 +1 @@ +9 diff --git a/tests/queries/0_stateless/02962_analyzer_constant_set.sql b/tests/queries/0_stateless/02962_analyzer_constant_set.sql new file mode 100644 index 00000000000..aae2f1c0145 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_constant_set.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_parallel_index; + +CREATE TABLE test_parallel_index +( + z UInt64, + INDEX i z TYPE set(8) +) +ENGINE = MergeTree +ORDER BY (); + +insert into test_parallel_index select number from numbers(10); + +select sum(z) from test_parallel_index where z = 2 or z = 7 or z = 13 or z = 17 or z = 19 or z = 23; + +DROP TABLE test_parallel_index; diff --git a/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference new file mode 100644 index 00000000000..05e8b3e24fc --- /dev/null +++ b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference @@ -0,0 +1,4 @@ +lc: dictionary not null +lc: dictionary not null +lc: dictionary not null +lc: dictionary not null diff --git a/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh new file mode 100755 index 00000000000..467c98b47cf --- /dev/null +++ b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.arrow + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=1, output_format_arrow_use_64_bit_indexes_for_dictionary=1" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=1, output_format_arrow_use_64_bit_indexes_for_dictionary=0" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=0, output_format_arrow_use_64_bit_indexes_for_dictionary=1" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=0, output_format_arrow_use_64_bit_indexes_for_dictionary=0" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.reference b/tests/queries/0_stateless/02962_join_using_bug_57894.reference new file mode 100644 index 00000000000..454655081df --- /dev/null +++ b/tests/queries/0_stateless/02962_join_using_bug_57894.reference @@ -0,0 +1,66 @@ +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.sql b/tests/queries/0_stateless/02962_join_using_bug_57894.sql new file mode 100644 index 00000000000..87aef8b1a71 --- /dev/null +++ b/tests/queries/0_stateless/02962_join_using_bug_57894.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS r; +SET allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE t (`x` UInt32, `s` LowCardinality(String)) ENGINE = Memory; +INSERT INTO t SELECT number, toString(number) FROM numbers(5); + +CREATE TABLE r (`x` LowCardinality(Nullable(UInt32)), `s` Nullable(String)) ENGINE = Memory; +INSERT INTO r SELECT number, toString(number) FROM numbers(2, 8); +INSERT INTO r VALUES (NULL, NULL); + +SET allow_experimental_analyzer = 0; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +; + + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'partial_merge'; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'full_sorting_merge'; + +SET allow_experimental_analyzer = 1; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'partial_merge'; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/02962_max_joined_block_rows.reference b/tests/queries/0_stateless/02962_max_joined_block_rows.reference new file mode 100644 index 00000000000..8bc1bad225b --- /dev/null +++ b/tests/queries/0_stateless/02962_max_joined_block_rows.reference @@ -0,0 +1,32 @@ +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +-- +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +-- +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 diff --git a/tests/queries/0_stateless/02962_max_joined_block_rows.sql b/tests/queries/0_stateless/02962_max_joined_block_rows.sql new file mode 100644 index 00000000000..c31ab5e1132 --- /dev/null +++ b/tests/queries/0_stateless/02962_max_joined_block_rows.sql @@ -0,0 +1,38 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE table t1 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t1 SELECT number % 2, number FROM numbers(10); + +CREATE table t2 (a UInt64) ENGINE = Memory; + +INSERT INTO t2 SELECT number % 2 FROM numbers(10); + +-- block size is always multiple of 5 because we have 5 rows for each key in right table +-- we do not split rows corresponding to the same key + +SELECT max(bs) <= 5, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 5; + +SELECT '--'; + +SELECT max(bs) <= 10, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 10; + +SELECT '--'; + +-- parallel_hash doen't support max_joined_block_size_rows + +SET join_algorithm = 'parallel_hash'; + +SELECT max(bs) > 10, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 10; diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference new file mode 100644 index 00000000000..f18a39e191e --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference @@ -0,0 +1,18 @@ +sales 15000 +sales 15000 +sales 15000 +sales 29400 +sales 29400 +sales 29400 +sales 43800 +sales 43800 +sales 43800 +sales 15000 5000 +sales 15000 5000 +sales 15000 5000 +sales 29400 4800 +sales 29400 4800 +sales 29400 4800 +sales 43800 4800 +sales 43800 4800 +sales 43800 4800 diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql new file mode 100644 index 00000000000..90af415c5ea --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql @@ -0,0 +1,32 @@ +CREATE TABLE empsalary +( + `depname` LowCardinality(String), + `empno` UInt64, + `salary` Int32, + `enroll_date` Date +) +ENGINE = Memory; + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +-- 1 window function + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary +FROM empsalary +order by depsalary; + + +-- 2 window functions with different window, +-- but result should be the same for depsalary + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) AS depminsalary +FROM empsalary +order by depsalary; diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference new file mode 100644 index 00000000000..42d5519df66 --- /dev/null +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference @@ -0,0 +1,4 @@ +Replication did not hang: synced all replicas of test_table_ +Consistency: 1 +Test completed +Data consistency check passed diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh new file mode 100755 index 00000000000..f47801abf73 --- /dev/null +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +TOTAL_REPLICAS=10 +REPLICAS_TO_DROP=7 +export TOTAL_REPLICAS +export REPLICAS_TO_DROP + +for i in $(seq $TOTAL_REPLICAS); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_table_$i (key UInt64, value UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_table', '$i') ORDER BY key" +done + +function insert_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + $CLICKHOUSE_CLIENT --query "INSERT INTO test_table_$REPLICA VALUES ($RANDOM, $RANDOM % 255)" + sleep 0.$RANDOM + done +} + +function sync_and_drop_replicas() { + while true; do + for i in $(seq $REPLICAS_TO_DROP); do + local stable_replica_id=$((i + 1)) + $CLICKHOUSE_CLIENT --query "ALTER TABLE test_table_$i MODIFY SETTING parts_to_throw_insert = 0" + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_table_$stable_replica_id LIGHTWEIGHT FROM '$i'" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + done + + for i in $(seq $REPLICAS_TO_DROP); do + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_table_$i (key UInt64, value UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_table', '$i') ORDER BY key" + done + done +} + +function optimize_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE test_table_$REPLICA FINAL" + sleep 0.$RANDOM + done +} + +function mutations_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + CONDITION="key % 2 = 0" + $CLICKHOUSE_CLIENT --query "ALTER TABLE test_table_$REPLICA DELETE WHERE $CONDITION" + sleep 0.$RANDOM + done +} + +export -f insert_thread +export -f sync_and_drop_replicas +export -f optimize_thread +export -f mutations_thread + +TIMEOUT=60 + +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c sync_and_drop_replicas 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c mutations_thread 2> /dev/null & + +wait + +check_replication_consistency "test_table_" "count(), sum(key), sum(value)" + +echo "Test completed" + +lost_parts_count=$($CLICKHOUSE_CLIENT --query "SELECT SUM(lost_part_count) FROM system.replicas WHERE database=currentDatabase()") +if [ "$lost_parts_count" -ne 0 ]; then + echo "Data consistency check failed: lost parts count is not zero" + exit 1 +fi + +echo "Data consistency check passed" + +for i in $(seq $TOTAL_REPLICAS); do + if [ $i -gt $REPLICAS_TO_DROP ]; then + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + fi +done \ No newline at end of file diff --git a/tests/queries/0_stateless/02963_invalid_identifier.reference b/tests/queries/0_stateless/02963_invalid_identifier.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_invalid_identifier.sql b/tests/queries/0_stateless/02963_invalid_identifier.sql new file mode 100644 index 00000000000..64a52364baa --- /dev/null +++ b/tests/queries/0_stateless/02963_invalid_identifier.sql @@ -0,0 +1 @@ +SELECT t.t.t.* FROM system.tables WHERE database = currentDatabase(); --{serverError INVALID_IDENTIFIER} diff --git a/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.reference b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql new file mode 100644 index 00000000000..a3a8bd0624a --- /dev/null +++ b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql @@ -0,0 +1,2 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/58727 +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; -- { serverError AGGREGATE_FUNCTION_THROW} diff --git a/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh new file mode 100755 index 00000000000..24fe964b824 --- /dev/null +++ b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +disk_name="02963_remote_read_bug" + +$CLICKHOUSE_CLIENT -nm --query " +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a Int32, s String) +ENGINE = MergeTree() +ORDER BY a +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '10Gi', path = '$disk_name', disk = 's3_disk'); + +INSERT INTO test SELECT number % 1000000, randomString(1) FROM numbers_mt(1e7) SETTINGS enable_filesystem_cache_on_write_operations = 0; + +OPTIMIZE TABLE test FINAL; +" + +query_id=$(random_str 10) + +$CLICKHOUSE_CLIENT -nm --query_id "$query_id" --query " +WITH RANDOM_SET AS ( + SELECT rand32() % 10000 FROM numbers(100) +) +SELECT * +FROM test +WHERE a IN RANDOM_SET AND s IN ('x', 'y', 'z') +FORMAT Null +SETTINGS + max_threads = 10, + allow_prefetched_read_pool_for_remote_filesystem = 1, filesystem_prefetch_min_bytes_for_single_read_task = '1Ki', + merge_tree_min_bytes_for_concurrent_read = 1, merge_tree_min_rows_for_concurrent_read = 1, + merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem = 1, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 1; +" + +$CLICKHOUSE_CLIENT -nm --query " +SYSTEM FLUSH LOGS; + +-- This threshold was determined experimentally - before the fix this ratio had values around 50K +SELECT throwIf(ProfileEvents['WriteBufferFromFileDescriptorWriteBytes'] / ProfileEvents['WriteBufferFromFileDescriptorWrite'] < 200000) +FROM system.query_log +WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type = 'QueryFinish'; +" + diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.reference b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql new file mode 100644 index 00000000000..552291b2f83 --- /dev/null +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -0,0 +1,56 @@ +-- Tags: no-fasttest + +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test1', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test1/'); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk='s3_disk_02963'; + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test1', + type = object_storage, + object_storage_type = s3, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test2', + type = object_storage, + object_storage_type = s3, + metadata_storage_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test3', + type = object_storage, + object_storage_type = s3, + metadata_type = lll, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test4', + type = object_storage, + object_storage_type = kkk, + metadata_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test5', + type = kkk, + object_storage_type = s3, + metadata_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference new file mode 100644 index 00000000000..5816b4eb49b --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference @@ -0,0 +1 @@ +3 4 diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql new file mode 100644 index 00000000000..92f7cc0671c --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql @@ -0,0 +1,9 @@ +drop table if exists a; + +create table a (i int, j int, projection p (select * order by j)) engine MergeTree partition by i order by tuple() settings index_granularity = 1; + +insert into a values (1, 2), (0, 5), (3, 4); + +select * from a where i > 0 and j = 4 settings force_index_by_date = 1; + +drop table a; diff --git a/tests/queries/0_stateless/02966_float32_promotion.reference b/tests/queries/0_stateless/02966_float32_promotion.reference new file mode 100644 index 00000000000..086e9795679 --- /dev/null +++ b/tests/queries/0_stateless/02966_float32_promotion.reference @@ -0,0 +1 @@ +49.9 diff --git a/tests/queries/0_stateless/02966_float32_promotion.sql b/tests/queries/0_stateless/02966_float32_promotion.sql new file mode 100644 index 00000000000..df687ca5338 --- /dev/null +++ b/tests/queries/0_stateless/02966_float32_promotion.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/58680 +DROP TABLE IF EXISTS f32_table; +CREATE TABLE f32_table (my_field Float32) ENGINE=Memory(); +INSERT INTO f32_table values ('49.9'); +SELECT * FROM f32_table where my_field = '49.9'; +DROP TABLE f32_table; diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.reference b/tests/queries/0_stateless/02967_mysql_settings_override.reference new file mode 100644 index 00000000000..f464935306b --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.reference @@ -0,0 +1,14 @@ +-- Uppercase tests +name value +limit 11 +name value +send_timeout 22 +name value +receive_timeout 33 +-- Lowercase tests +name value +limit 44 +name value +send_timeout 55 +name value +receive_timeout 66 diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.sh b/tests/queries/0_stateless/02967_mysql_settings_override.sh new file mode 100755 index 00000000000..2c569c13696 --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires mysql client + +# Tests the override of certain MySQL proprietary settings to ClickHouse native settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CHANGED_SETTINGS_QUERY="SELECT name, value FROM system.settings WHERE name IN ('limit', 'send_timeout', 'receive_timeout') AND changed;" + +echo "-- Uppercase tests" +${MYSQL_CLIENT} --execute "SET SQL_SELECT_LIMIT = 11; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "SET NET_WRITE_TIMEOUT = 22; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "SET NET_READ_TIMEOUT = 33; $CHANGED_SETTINGS_QUERY" + +echo "-- Lowercase tests" +${MYSQL_CLIENT} --execute "set sql_select_limit=44; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "set net_write_timeout=55; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "set net_read_timeout=66; $CHANGED_SETTINGS_QUERY" diff --git a/tests/queries/1_stateful/00061_storage_buffer.sql b/tests/queries/1_stateful/00061_storage_buffer.sql index e3cda3de36d..220f4fb3686 100644 --- a/tests/queries/1_stateful/00061_storage_buffer.sql +++ b/tests/queries/1_stateful/00061_storage_buffer.sql @@ -1,7 +1,13 @@ DROP TABLE IF EXISTS test.hits_dst; DROP TABLE IF EXISTS test.hits_buffer; -CREATE TABLE test.hits_dst AS test.hits; +CREATE TABLE test.hits_dst AS test.hits +ENGINE = MergeTree +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS storage_policy = 'default'; + CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 600, 600, 1000000, 1000000, 100000000, 1000000000); INSERT INTO test.hits_buffer SELECT * FROM test.hits WHERE CounterID = 800784; diff --git a/tests/queries/1_stateful/00152_insert_different_granularity.reference b/tests/queries/1_stateful/00152_insert_different_granularity.reference index c573f1c3072..209e3ef4b62 100644 --- a/tests/queries/1_stateful/00152_insert_different_granularity.reference +++ b/tests/queries/1_stateful/00152_insert_different_granularity.reference @@ -1,2 +1 @@ -8873918 -8873998 +20 diff --git a/tests/queries/1_stateful/00152_insert_different_granularity.sql b/tests/queries/1_stateful/00152_insert_different_granularity.sql index 294d71b384b..b5b3bc18231 100644 --- a/tests/queries/1_stateful/00152_insert_different_granularity.sql +++ b/tests/queries/1_stateful/00152_insert_different_granularity.sql @@ -5,54 +5,18 @@ DROP TABLE IF EXISTS fixed_granularity_table; CREATE TABLE fixed_granularity_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0, min_bytes_for_wide_part = 0; -- looks like default table before update -ALTER TABLE fixed_granularity_table REPLACE PARTITION 201403 FROM test.hits; - INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -- should still have non adaptive granularity - INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -- We have removed testing of OPTIMIZE because it's too heavy on very slow builds (debug + coverage + thread fuzzer with sleeps) -- OPTIMIZE TABLE fixed_granularity_table FINAL; -- and even after optimize DETACH TABLE fixed_granularity_table; - ATTACH TABLE fixed_granularity_table; ALTER TABLE fixed_granularity_table DETACH PARTITION 201403; - ALTER TABLE fixed_granularity_table ATTACH PARTITION 201403; SELECT count() from fixed_granularity_table; DROP TABLE IF EXISTS fixed_granularity_table; - -ALTER TABLE test.hits DETACH PARTITION 201403; - -ALTER TABLE test.hits ATTACH PARTITION 201403; - -DROP TABLE IF EXISTS hits_copy; - -CREATE TABLE hits_copy (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) - SAMPLE BY intHash32(UserID) - SETTINGS index_granularity=8192, min_bytes_for_wide_part = 0; - -ALTER TABLE hits_copy REPLACE PARTITION 201403 FROM test.hits; - --- It's important to test table, which were created before server update -INSERT INTO test.hits SELECT * FROM hits_copy LIMIT 100; - -ALTER TABLE test.hits DETACH PARTITION 201403; - -ALTER TABLE test.hits ATTACH PARTITION 201403; - --- OPTIMIZE TABLE test.hits; - -SELECT count() FROM test.hits; - --- restore hits -ALTER TABLE test.hits REPLACE PARTITION 201403 FROM hits_copy; - -DROP TABLE IF EXISTS hits_copy; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 15bd4cbe6d4..3621ff82126 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -1,7 +1,14 @@ -- Tags: no-tsan, no-parallel DROP TABLE IF EXISTS test.hits_1m; -CREATE TABLE test.hits_1m as test.hits; + +CREATE TABLE test.hits_1m AS test.hits +ENGINE = MergeTree +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS storage_policy = 'default'; + INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; CREATE DATABASE IF NOT EXISTS db_dict; diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index d1f868eba97..484dd46a90c 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -1,173 +1,98 @@ +#include +#include +#include #include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DB::MySQLCharsetPtr charset = std::make_shared(); -static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( - DB::MySQLReplication::EventHeader & header, DB::ReadBuffer & payload, - std::shared_ptr & last_table_map_event, bool exist_checksum) +bool quit = false; +void signal_handler(int) { + quit = true; +} + +static void processBinlogFromFile(const std::string & bin_path, bool disable_checksum) +{ + DB::MySQLReplication::BinlogFromFile binlog; + binlog.open(bin_path); + binlog.setChecksum(disable_checksum ? DB::MySQLReplication::IBinlog::NONE : DB::MySQLReplication::IBinlog::CRC32); + DB::MySQLReplication::BinlogEventPtr event; - DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, - /* trow_exception */ false, /* exact_limit */ std::nullopt); - DB::ReadBufferPtr event_payload = std::make_shared(*limit_read_buffer, exist_checksum ? 4 : 0); - - switch (header.type) + while (binlog.tryReadEvent(event, /*timeout*/ 0) && !quit) { - case DB::MySQLReplication::FORMAT_DESCRIPTION_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::ROTATE_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::QUERY_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - - auto query = std::static_pointer_cast(event); - switch (query->typ) - { - case DB::MySQLReplication::QUERY_EVENT_MULTI_TXN_FLAG: - case DB::MySQLReplication::QUERY_EVENT_XA: - { - event = std::make_shared(std::move(query->header)); - break; - } - default: - break; - } - break; - } - case DB::MySQLReplication::XID_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::TABLE_MAP_EVENT: - { - DB::MySQLReplication::TableMapEventHeader map_event_header; - map_event_header.parse(*event_payload); - event = std::make_shared(std::move(header), map_event_header, charset); - event->parseEvent(*event_payload); - last_table_map_event = std::static_pointer_cast(event); - break; - } - case DB::MySQLReplication::WRITE_ROWS_EVENT_V1: - case DB::MySQLReplication::WRITE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::DELETE_ROWS_EVENT_V1: - case DB::MySQLReplication::DELETE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1: - case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::GTID_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - default: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } + DB::WriteBufferFromOStream cout(std::cout); + event->dump(cout); + binlog.getPosition().dump(cout); + cout.finalize(); } - - return event; } -static int checkBinLogFile(const std::string & bin_path, bool exist_checksum) +static void processBinlogFromSocket(const std::string & host, int port, const std::string & user, const std::string & password, const std::string & executed_gtid_set, bool disable_checksum) { - DB::ReadBufferFromFile in(bin_path); - DB::assertString("\xfe\x62\x69\x6e", in); /// magic number + DB::MySQLReplication::BinlogFromSocket binlog; + binlog.setChecksum(disable_checksum ? DB::MySQLReplication::IBinlog::NONE : DB::MySQLReplication::IBinlog::CRC32); - DB::MySQLReplication::BinlogEventPtr last_event; - std::shared_ptr last_header; - std::shared_ptr table_map; + binlog.connect(host, port, user, password); + binlog.start(/*unique number*/ 42, executed_gtid_set); + DB::MySQLReplication::BinlogEventPtr event; - try + while (!quit) { - while (!in.eof()) + if (binlog.tryReadEvent(event, /*timeout*/ 100)) { - last_header = std::make_shared(); - last_header->parse(in); - last_event = parseSingleEventBody(*last_header, in, table_map, exist_checksum); + if (event->header.type != DB::MySQLReplication::HEARTBEAT_EVENT) + { + DB::WriteBufferFromOStream cout(std::cout); + event->dump(cout); + binlog.getPosition().dump(cout); + cout.finalize(); + } } } - catch (...) - { - DB::WriteBufferFromOStream cerr(std::cerr); - cerr << "Unable to parse MySQL binlog event. Code: " << DB::getCurrentExceptionCode() << ", Exception message: " - << DB::getCurrentExceptionMessage(false) << '\n' << ", Previous event: " << '\n'; - last_event->dump(cerr); - cerr << '\n' << ", Event header: " << '\n'; - last_header->dump(cerr); - cerr << '\n'; - return DB::getCurrentExceptionCode(); - } - - DB::WriteBufferFromOStream cout(std::cout); - cout << "Check passed. " << '\n' << "No exception was thrown." << '\n' << "The last binlog event: " << '\n'; - last_event->dump(cout); - cout << '\n'; - return 0; } - int main(int argc, char ** argv) { + (void)signal(SIGINT, signal_handler); boost::program_options::options_description desc("Allowed options"); - desc.add_options()("help,h", "Produce help message"); - desc.add_options()("disable_checksum", "Disable checksums in binlog files."); - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + std::string host = "127.0.0.1"; + int port = 3306; + std::string user = "root"; + std::string password; + std::string gtid; - if (options.count("help") || argc < 2) + desc.add_options() + ("help", "Produce help message") + ("disable_checksum", "Disable checksums in binlog files.") + ("binlog", boost::program_options::value(), "Binlog file") + ("host", boost::program_options::value(&host)->default_value(host), "Host to connect") + ("port", boost::program_options::value(&port)->default_value(port), "Port number to connect") + ("user", boost::program_options::value(&user)->default_value(user), "User") + ("password", boost::program_options::value(&password), "Password") + ("gtid", boost::program_options::value(>id), "Executed gtid set"); + + try { - std::cout << "Usage: " << argv[0] << " mysql_binlog_file" << std::endl; - std::cout << desc << std::endl; - return 1; + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + boost::program_options::notify(options); + + if (options.count("help") || (!options.count("binlog") && !options.count("gtid"))) + { + std::cout << "Usage: " << argv[0] << std::endl; + std::cout << desc << std::endl; + return EXIT_FAILURE; + } + + if (options.count("binlog")) + processBinlogFromFile(options["binlog"].as(), options.count("disable_checksum")); + else + processBinlogFromSocket(host, port, user, password, gtid, options.count("disable_checksum")); + } + catch (std::exception & ex) + { + std::cerr << ex.what() << std::endl; + return EXIT_FAILURE; } - return checkBinLogFile(argv[argc - 1], !options.count("disable_checksum")); + return EXIT_SUCCESS; } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index d68330771e5..c4089b21953 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -198,6 +198,7 @@ DELETEs DESC DIEs DOGEFI +Damerau DataGrip DataLens DataTime @@ -344,6 +345,7 @@ Hypot IANA IDE IDEs +IDNA IMDS INFILE INSERTed @@ -413,6 +415,7 @@ JSONType JSONs Jaeger Jannis +Jaro JavaHash Jemalloc Jepsen @@ -701,8 +704,6 @@ PrettySpaceMonoBlock PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock Prewhere -TotalPrimaryKeyBytesInMemory -TotalPrimaryKeyBytesInMemoryAllocated PrivateKeyPassphraseHandler ProfileEvents Profiler @@ -714,8 +715,10 @@ Promtail Protobuf ProtobufSingle ProxySQL +Punycode PyArrow PyCharm +QATlib QEMU QTCreator Quantile @@ -725,6 +728,7 @@ QueryCacheHits QueryCacheMisses QueryPreempted QueryThread +QuickAssist QuoteMeta RBAC RClickHouse @@ -912,6 +916,7 @@ ThreadsInOvercommitTracker Timeunit TinyLog Tkachenko +ToASCII ToCenterChild ToChildren ToGeo @@ -920,10 +925,13 @@ ToIPv ToParent ToSnowflake ToString +ToUnicode Toolset TopK TotalBytesOfMergeTreeTables TotalPartsOfMergeTreeTables +TotalPrimaryKeyBytesInMemory +TotalPrimaryKeyBytesInMemoryAllocated TotalRowsOfMergeTreeTables TotalTemporaryFiles Tradeoff @@ -976,6 +984,7 @@ Werror Wether WikiStat WindowView +Winkler WithNames WithNamesAndTypes WordNet @@ -992,6 +1001,7 @@ YYYYMMDDToDate YYYYMMDDhhmmssToDateTime Yandex Yasm +ZSTDQAT Zabbix Zipkin ZooKeeper @@ -1338,6 +1348,7 @@ cutToFirstSignificantSubdomainWithWWW cutURLParameter cutWWW cyrus +damerauLevenshteinDistance datacenter datacenters datafiles @@ -1451,6 +1462,7 @@ endian endianness endsWith endsWithUTF +endswith enqueued enum enum's @@ -1623,6 +1635,8 @@ hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull +hasall +hasany hashtables haversine hdbc @@ -1651,6 +1665,8 @@ hyvor icosahedron icudata idempotency +idnaDecode +idnaEncode ifNotFinite ifNull iframe @@ -1695,6 +1711,8 @@ isZeroOrNull iteratively jaccard jaccardIndex +jaroSimilarity +jaroWinklerSimilarity javaHash javaHashUTF jbod @@ -1790,6 +1808,7 @@ llvm loadDefaultCAFile localhost localread +loess logTrace logagent loghouse @@ -1848,14 +1867,14 @@ metrica metroHash mfedotov minMap +minSampleSizeContinuous +minSampleSizeConversion mindsdb minimalistic mininum miniselect minmap minmax -minSampleSizeContinuous -minSampleSizeConversion mins misconfiguration mispredictions @@ -1885,6 +1904,7 @@ multidirectory multiline multiplyDecimal multipolygon +multisearchany multisets multithread multiword @@ -1937,7 +1957,9 @@ notEquals notILike notIn notLike +notempty notequals +notlike notretry nowInBlock ntile @@ -2075,6 +2097,8 @@ pseudorandom pseudorandomize psql ptrs +punycodeDecode +punycodeEncode pushdown pwrite py @@ -2238,6 +2262,7 @@ seektable sequenceCount sequenceMatch sequenceNextNode +seriesDecomposeSTL seriesPeriodDetectFFT serverTimeZone serverTimezone @@ -2283,6 +2308,8 @@ splitByString splitByWhitespace splitby sqid +sqidDecode +sqidEncode sql sqlalchemy sqlinsert @@ -2293,6 +2320,7 @@ stacktrace stacktraces startsWith startsWithUTF +startswith statbox stateful stddev @@ -2521,6 +2549,8 @@ trimRight trunc tryBase tryDecrypt +tryIdnaEncode +tryPunycodeDecode tskv tsv tui @@ -2665,3 +2695,4 @@ znode znodes zookeeperSessionUptime zstd +srcReplicas diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 88b43afff26..daee2e7fb00 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -420,6 +420,9 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep - # Check for bad punctuation: whitespace before comma. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" +# Check usage of std::regex which is too bloated and slow. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number 'std::regex' | grep -P '.' && echo "^ Please use re2 instead of std::regex" + # Cyrillic characters hiding inside Latin. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place." diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader deleted file mode 100755 index d53661614c0..00000000000 --- a/utils/s3tools/s3uploader +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os -import logging -import argparse -import tarfile -import math - -try: - from boto.s3.connection import S3Connection - from boto.s3.key import Key -except ImportError: - raise ImportError("You have to install boto package 'pip install boto'") - - -class S3API(object): - def __init__(self, access_key, secret_access_key, mds_api, mds_url): - self.connection = S3Connection( - host=mds_api, - aws_access_key_id=access_key, - aws_secret_access_key=secret_access_key, - ) - self.mds_url = mds_url - - def upload_file(self, bucket_name, file_path, s3_path): - logging.info("Start uploading file to bucket %s", bucket_name) - - bucket = self.connection.get_bucket(bucket_name) - key = bucket.initiate_multipart_upload(s3_path) - logging.info("Will upload to s3 path %s", s3_path) - chunksize = 1024 * 1024 * 1024 # 1 GB - filesize = os.stat(file_path).st_size - logging.info("File size is %s", filesize) - chunkcount = int(math.ceil(filesize / chunksize)) - - def call_back(x, y): - print("Uploaded {}/{} bytes".format(x, y)) - - try: - for i in range(chunkcount + 1): - logging.info("Uploading chunk %s of %s", i, chunkcount + 1) - offset = chunksize * i - bytes_size = min(chunksize, filesize - offset) - - with open(file_path, "r") as fp: - fp.seek(offset) - key.upload_part_from_file( - fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100 - ) - key.complete_upload() - except Exception as ex: - key.cancel_upload() - raise ex - logging.info("Contents were set") - return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path - ) - - def set_file_contents(self, bucket, local_file_path, s3_file_path): - key = Key(bucket) - key.key = s3_file_path - file_size = os.stat(local_file_path).st_size - logging.info( - "Uploading file `%s` to `%s`. Size is %s", - local_file_path, - s3_file_path, - file_size, - ) - - def call_back(x, y): - print("Uploaded {}/{} bytes".format(x, y)) - - key.set_contents_from_filename(local_file_path, cb=call_back) - - def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path): - bucket = self.connection.get_bucket(bucket_name) - if s3_path.endswith("/"): - s3_path += "store/" - else: - s3_path += "/store/" - print(s3_path) - for root, dirs, files in os.walk(directory_path): - path = root.split(os.sep) - for file in files: - local_file_path = os.path.join(root, file) - s3_file = local_file_path[len(directory_path) + 1 :] - s3_file_path = os.path.join(s3_path, s3_file) - self.set_file_contents(bucket, local_file_path, s3_file_path) - - logging.info("Uploading finished") - return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path - ) - - def list_bucket_keys(self, bucket_name): - bucket = self.connection.get_bucket(bucket_name) - for obj in bucket.get_all_keys(): - print(obj.key) - - def remove_folder_from_bucket(self, bucket_name, folder_path): - bucket = self.connection.get_bucket(bucket_name) - bucket.get_all_keys() - for obj in bucket.get_all_keys(): - if obj.key.startswith(folder_path): - print("Removing " + obj.key) - obj.delete() - - -def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix): - relative_data_path = os.path.join("data", db_name, table_name) - relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql") - path_to_data = os.path.join(clickhouse_data_path, relative_data_path) - path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) - temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name) - with tarfile.open(temporary_file_name, "w") as bundle: - bundle.add(path_to_data, arcname=relative_data_path) - bundle.add(path_to_metadata, arcname=relative_meta_path) - return temporary_file_name - - -USAGE_EXAMPLES = """ -examples: -\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket -\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/ -""" - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - - parser = argparse.ArgumentParser( - description="Simple tool for uploading datasets to clickhouse S3", - usage="%(prog)s [options] {}".format(USAGE_EXAMPLES), - ) - parser.add_argument("--s3-api-url", default="s3.amazonaws.com") - parser.add_argument("--s3-common-url", default="s3.amazonaws.com") - parser.add_argument("--bucket-name", default="clickhouse-datasets") - parser.add_argument( - "--dataset-name", - required=True, - help="Name of dataset, will be used in uploaded path", - ) - parser.add_argument("--access-key-id", required=True) - parser.add_argument("--secret-access-key", required=True) - parser.add_argument( - "--clickhouse-data-path", - default="/var/lib/clickhouse/", - help="Path to clickhouse database on filesystem", - ) - parser.add_argument("--s3-path", help="Path in s3, where to upload file") - parser.add_argument( - "--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file" - ) - data_group = parser.add_mutually_exclusive_group(required=True) - table_name_argument = data_group.add_argument( - "--table-name", - help="Name of table with database, if you are uploading partitions", - ) - data_group.add_argument("--file-path", help="Name of file, if you are uploading") - data_group.add_argument( - "--directory-path", help="Path to directory with files to upload" - ) - data_group.add_argument( - "--list-directory", help="List s3 directory by --directory-path" - ) - data_group.add_argument( - "--remove-directory", help="Remove s3 directory by --directory-path" - ) - args = parser.parse_args() - - if args.table_name is not None and args.clickhouse_data_path is None: - raise argparse.ArgumentError( - table_name_argument, - "You should specify --clickhouse-data-path to upload --table", - ) - - s3_conn = S3API( - args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url - ) - - file_path = "" - directory_path = args.directory_path - s3_path = args.s3_path - - if args.list_directory: - s3_conn.list_bucket_keys(args.bucket_name) - elif args.remove_directory: - print("Removing s3 path: " + args.remove_directory) - s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory) - elif args.directory_path is not None: - url = s3_conn.upload_data_for_static_files_disk( - args.bucket_name, directory_path, s3_path - ) - logging.info("Data uploaded: %s", url) - else: - if args.table_name is not None: - if "." not in args.table_name: - db_name = "default" - else: - db_name, table_name = args.table_name.split(".") - file_path = make_tar_file_for_table( - args.clickhouse_data_path, db_name, table_name, args.tmp_prefix - ) - else: - file_path = args.file_path - - if "tsv" in file_path: - s3_path = os.path.join( - args.dataset_name, "tsv", os.path.basename(file_path) - ) - if args.table_name is not None: - s3_path = os.path.join( - args.dataset_name, "partitions", os.path.basename(file_path) - ) - elif args.s3_path is not None: - s3_path = os.path.join( - args.dataset_name, args.s3_path, os.path.basename(file_path) - ) - else: - raise Exception("Don't know s3-path to upload") - - url = s3_conn.upload_file(args.bucket_name, file_path, s3_path) - logging.info("Data uploaded: %s", url)