diff --git a/.gitmodules b/.gitmodules index 53ef899dd99..68016bf8c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -245,6 +245,12 @@ [submodule "contrib/idxd-config"] path = contrib/idxd-config url = https://github.com/intel/idxd-config +[submodule "contrib/QAT-ZSTD-Plugin"] + path = contrib/QAT-ZSTD-Plugin + url = https://github.com/intel/QAT-ZSTD-Plugin +[submodule "contrib/qatlib"] + path = contrib/qatlib + url = https://github.com/intel/qatlib [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash @@ -360,3 +366,6 @@ [submodule "contrib/sqids-cpp"] path = contrib/sqids-cpp url = https://github.com/sqids/sqids-cpp.git +[submodule "contrib/idna"] + path = contrib/idna + url = https://github.com/ada-url/idna.git diff --git a/README.md b/README.md index c56b3c2fd0d..d356e429892 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. +Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1b5ba15187f..c6d1dcb41e6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx) add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) +add_contrib (idna-cmake idna) add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (incbin-cmake incbin) add_contrib (sqids-cpp-cmake sqids-cpp) @@ -171,9 +172,9 @@ add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) - option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) + option (ENABLE_QPL "Enable Intel® Query Processing Library (QPL)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher") + message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 with SSE 4.2 or higher") endif() if (ENABLE_QPL) add_contrib (idxd-config-cmake idxd-config) @@ -182,6 +183,28 @@ else() message(STATUS "Not using QPL") endif () +if (OS_LINUX AND ARCH_AMD64) + option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) +elseif(ENABLE_QATLIB) + message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") +endif() +if (ENABLE_QATLIB) + option (ENABLE_QAT_USDM_DRIVER "A User Space DMA-able Memory (USDM) component which allocates/frees DMA-able memory" OFF) + option (ENABLE_QAT_OUT_OF_TREE_BUILD "Using out-of-tree driver, user needs to customize ICP_ROOT variable" OFF) + set(ICP_ROOT "" CACHE STRING "ICP_ROOT variable to define the path of out-of-tree driver package") + if (ENABLE_QAT_OUT_OF_TREE_BUILD) + if (ICP_ROOT STREQUAL "") + message(FATAL_ERROR "Please define the path of out-of-tree driver package with -DICP_ROOT=xxx or disable out-of-tree build with -DENABLE_QAT_OUT_OF_TREE_BUILD=OFF; \ + If you want out-of-tree build but have no package available, please download and build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html") + endif () + else() + add_contrib (qatlib-cmake qatlib) # requires: isa-l + endif () + add_contrib (QAT-ZSTD-Plugin-cmake QAT-ZSTD-Plugin) +else() + message(STATUS "Not using QATLib") +endif () + add_contrib (morton-nd-cmake morton-nd) if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) diff --git a/contrib/NuRaft b/contrib/NuRaft index 2f5f52c4d8c..1278e32bb0d 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 2f5f52c4d8c87c2a3a3d101ca3a0194c9b77526f +Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63 diff --git a/contrib/QAT-ZSTD-Plugin b/contrib/QAT-ZSTD-Plugin new file mode 160000 index 00000000000..e5a134e12d2 --- /dev/null +++ b/contrib/QAT-ZSTD-Plugin @@ -0,0 +1 @@ +Subproject commit e5a134e12d2ea8a5b0f3b83c5b1c325fda4eb0a8 diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..72d21a8572b --- /dev/null +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -0,0 +1,85 @@ +# Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a plugin to Zstandard*(ZSTD*) for accelerating compression by QAT. +# ENABLE_QAT_OUT_OF_TREE_BUILD = 1 means kernel don't have native support, user will build and install driver from external package: https://www.intel.com/content/www/us/en/download/765501.html +# meanwhile, user need to set ICP_ROOT environment variable which point to the root directory of QAT driver source tree. +# ENABLE_QAT_OUT_OF_TREE_BUILD = 0 means kernel has built-in qat driver, QAT-ZSTD-PLUGIN just has dependency on qatlib. + +if (ENABLE_QAT_OUT_OF_TREE_BUILD) + message(STATUS "Intel QATZSTD out-of-tree build, ICP_ROOT:${ICP_ROOT}") + + set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") + set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") + set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") + set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") + set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") + set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") + set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") + set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") + set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") + if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) + endif() + add_library(_qatzstd_plugin ${QATZSTD_SRC}) + target_link_libraries (_qatzstd_plugin PUBLIC ${USDM_LIBRARY} ${QAT_S_LIBRARY}) + target_include_directories(_qatzstd_plugin + SYSTEM PUBLIC "${QATZSTD_SRC_DIR}" + PRIVATE ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR}) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC) + add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) +else () # In-tree build + message(STATUS "Intel QATZSTD in-tree build") + set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") + set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") + set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") + + # please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html + set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") + set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") + set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") + set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") + set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") + set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") + set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") + set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") + set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") + + file(MAKE_DIRECTORY + "${LIBQAT_HEADER_DIR}/qat" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" + ) + + if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) + endif() + + add_library(_qatzstd_plugin ${QATZSTD_SRC}) + target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) + target_include_directories(_qatzstd_plugin PRIVATE + ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR} + ${LIBQAT_HEADER_DIR}) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE) + target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) + add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) +endif () + diff --git a/contrib/azure b/contrib/azure index 060c54dfb0a..e71395e44f3 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3 +Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2 diff --git a/contrib/idna b/contrib/idna new file mode 160000 index 00000000000..3c8be01d42b --- /dev/null +++ b/contrib/idna @@ -0,0 +1 @@ +Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667 diff --git a/contrib/idna-cmake/CMakeLists.txt b/contrib/idna-cmake/CMakeLists.txt new file mode 100644 index 00000000000..1138b836192 --- /dev/null +++ b/contrib/idna-cmake/CMakeLists.txt @@ -0,0 +1,24 @@ +option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES}) +if ((NOT ENABLE_IDNA)) + message (STATUS "Not using idna") + return() +endif() +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna") + +set (SRCS + "${LIBRARY_DIR}/src/idna.cpp" + "${LIBRARY_DIR}/src/mapping.cpp" + "${LIBRARY_DIR}/src/mapping_tables.cpp" + "${LIBRARY_DIR}/src/normalization.cpp" + "${LIBRARY_DIR}/src/normalization_tables.cpp" + "${LIBRARY_DIR}/src/punycode.cpp" + "${LIBRARY_DIR}/src/to_ascii.cpp" + "${LIBRARY_DIR}/src/to_unicode.cpp" + "${LIBRARY_DIR}/src/unicode_transcoding.cpp" + "${LIBRARY_DIR}/src/validity.cpp" +) + +add_library (_idna ${SRCS}) +target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include") + +add_library (ch_contrib::idna ALIAS _idna) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index c77d5d8319e..60c9d6c4d90 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -33,7 +33,6 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/optional.cpp" "${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" -"${LIBCXX_SOURCE_DIR}/src/regex.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" "${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" diff --git a/contrib/llvm-project b/contrib/llvm-project index 1834e42289c..2568a7cd129 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec +Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856 diff --git a/contrib/qatlib b/contrib/qatlib new file mode 160000 index 00000000000..abe15d7bfc0 --- /dev/null +++ b/contrib/qatlib @@ -0,0 +1 @@ +Subproject commit abe15d7bfc083117bfbb4baee0b49ffcd1c03c5c diff --git a/contrib/qatlib-cmake/CMakeLists.txt b/contrib/qatlib-cmake/CMakeLists.txt new file mode 100644 index 00000000000..d599775035a --- /dev/null +++ b/contrib/qatlib-cmake/CMakeLists.txt @@ -0,0 +1,213 @@ +# Intel® QuickAssist Technology Library (QATlib). + +message(STATUS "Intel QATlib ON") +set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(LIBQAT_DIR "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src") +set(LIBOSAL_DIR "${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src") +set(OPENSSL_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl") + +# Build 3 libraries: _qatmgr, _osal, _qatlib +# Produce ch_contrib::qatlib by linking these libraries. + +# _qatmgr + +SET(LIBQATMGR_sources ${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_client.c + ${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_lib.c + ${LIBQAT_DIR}/qat_direct/vfio/qat_log.c + ${LIBQAT_DIR}/qat_direct/vfio/vfio_lib.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_proto.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_vf_msg.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_pf.c) + +add_library(_qatmgr ${LIBQATMGR_sources}) + +target_include_directories(_qatmgr PRIVATE + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include + ${LIBQAT_ROOT_DIR}/quickassist/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include) + +target_compile_definitions(_qatmgr PRIVATE -DUSER_SPACE) +target_compile_options(_qatmgr PRIVATE -Wno-error=int-conversion) + +# _osal + +SET(LIBOSAL_sources + ${LIBOSAL_DIR}/linux/user_space/OsalSemaphore.c + ${LIBOSAL_DIR}/linux/user_space/OsalThread.c + ${LIBOSAL_DIR}/linux/user_space/OsalMutex.c + ${LIBOSAL_DIR}/linux/user_space/OsalSpinLock.c + ${LIBOSAL_DIR}/linux/user_space/OsalAtomic.c + ${LIBOSAL_DIR}/linux/user_space/OsalServices.c + ${LIBOSAL_DIR}/linux/user_space/OsalUsrKrnProxy.c + ${LIBOSAL_DIR}/linux/user_space/OsalCryptoInterface.c) + +add_library(_osal ${LIBOSAL_sources}) + +target_include_directories(_osal PRIVATE + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${OPENSSL_DIR}/include + ${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include + ${ClickHouse_SOURCE_DIR}/contrib/qatlib-cmake/include) + +target_compile_definitions(_osal PRIVATE -DOSAL_ENSURE_ON -DUSE_OPENSSL) + +# _qatlib +SET(LIBQAT_sources + ${LIBQAT_DIR}/common/compression/dc_buffers.c + ${LIBQAT_DIR}/common/compression/dc_chain.c + ${LIBQAT_DIR}/common/compression/dc_datapath.c + ${LIBQAT_DIR}/common/compression/dc_dp.c + ${LIBQAT_DIR}/common/compression/dc_header_footer.c + ${LIBQAT_DIR}/common/compression/dc_header_footer_lz4.c + ${LIBQAT_DIR}/common/compression/dc_session.c + ${LIBQAT_DIR}/common/compression/dc_stats.c + ${LIBQAT_DIR}/common/compression/dc_err_sim.c + ${LIBQAT_DIR}/common/compression/dc_ns_datapath.c + ${LIBQAT_DIR}/common/compression/dc_ns_header_footer.c + ${LIBQAT_DIR}/common/compression/dc_crc32.c + ${LIBQAT_DIR}/common/compression/dc_crc64.c + ${LIBQAT_DIR}/common/compression/dc_xxhash32.c + ${LIBQAT_DIR}/common/compression/icp_sal_dc_err_sim.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_control_path.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_data_path.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_stats.c + ${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa.c + ${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_common.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_montedwds.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_nist_curves.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdh.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdsa.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecsm2.c + ${LIBQAT_DIR}/common/crypto/asym/ecc/lac_kpt_ecdsa.c + ${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln.c + ${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_mmp.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_qat_comms.c + ${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_utils.c + ${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime.c + ${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_control_path.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_decrypt.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_encrypt.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_interface_check.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_keygen.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_stats.c + ${LIBQAT_DIR}/common/crypto/asym/rsa/lac_kpt_rsa_decrypt.c + ${LIBQAT_DIR}/common/crypto/sym/drbg/lac_sym_drbg_api.c + ${LIBQAT_DIR}/common/crypto/sym/key/lac_sym_key.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_alg_chain.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_api.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_auth_enc.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_cb.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_cipher.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_compile_check.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_dp.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_partial.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_queue.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_stats.c + ${LIBQAT_DIR}/common/crypto/sym/nrbg/lac_sym_nrbg_api.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_cipher.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_constants_table.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash_defs_lookup.c + ${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_key.c + ${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash_sw_precomputes.c + ${LIBQAT_DIR}/common/crypto/kpt/provision/lac_kpt_provision.c + ${LIBQAT_DIR}/common/ctrl/sal_compression.c + ${LIBQAT_DIR}/common/ctrl/sal_create_services.c + ${LIBQAT_DIR}/common/ctrl/sal_ctrl_services.c + ${LIBQAT_DIR}/common/ctrl/sal_list.c + ${LIBQAT_DIR}/common/ctrl/sal_crypto.c + ${LIBQAT_DIR}/common/ctrl/sal_dc_chain.c + ${LIBQAT_DIR}/common/ctrl/sal_instances.c + ${LIBQAT_DIR}/common/qat_comms/sal_qat_cmn_msg.c + ${LIBQAT_DIR}/common/utils/lac_buffer_desc.c + ${LIBQAT_DIR}/common/utils/lac_log_message.c + ${LIBQAT_DIR}/common/utils/lac_mem.c + ${LIBQAT_DIR}/common/utils/lac_mem_pools.c + ${LIBQAT_DIR}/common/utils/lac_sw_responses.c + ${LIBQAT_DIR}/common/utils/lac_sync.c + ${LIBQAT_DIR}/common/utils/sal_service_state.c + ${LIBQAT_DIR}/common/utils/sal_statistics.c + ${LIBQAT_DIR}/common/utils/sal_misc_error_stats.c + ${LIBQAT_DIR}/common/utils/sal_string_parse.c + ${LIBQAT_DIR}/common/utils/sal_user_process.c + ${LIBQAT_DIR}/common/utils/sal_versions.c + ${LIBQAT_DIR}/common/device/sal_dev_info.c + ${LIBQAT_DIR}/user/sal_user.c + ${LIBQAT_DIR}/user/sal_user_dyn_instance.c + ${LIBQAT_DIR}/qat_direct/common/adf_process_proxy.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_cfg.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_device.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_dyn.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_ETring_mgr_dp.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_init.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_ring.c + ${LIBQAT_DIR}/qat_direct/common/adf_user_transport_ctrl.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_cfg.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_ring.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_bundles.c + ${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_proxy.c + ${LIBQAT_DIR}/common/compression/dc_crc_base.c) + +add_library(_qatlib ${LIBQAT_sources}) + +target_include_directories(_qatlib PRIVATE + ${CMAKE_SYSROOT}/usr/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include + ${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include + ${LIBOSAL_DIR}/linux/user_space/include + ${LIBQAT_ROOT_DIR}/quickassist/include + ${LIBQAT_ROOT_DIR}/quickassist/include/lac + ${LIBQAT_ROOT_DIR}/quickassist/include/dc + ${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/compression/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/sym/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/asym/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/firmware/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include + ${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space + ${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include) + +target_link_libraries(_qatlib PRIVATE _qatmgr _osal OpenSSL::SSL ch_contrib::isal) +target_compile_definitions(_qatlib PRIVATE -DUSER_SPACE -DLAC_BYTE_ORDER=__LITTLE_ENDIAN -DOSAL_ENSURE_ON) +target_link_options(_qatlib PRIVATE -pie -z relro -z now -z noexecstack) +target_compile_options(_qatlib PRIVATE -march=native) +add_library (ch_contrib::qatlib ALIAS _qatlib) + +# _usdm + +set(LIBUSDM_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib/quickassist/utilities/libusdm_drv") +set(LIBUSDM_sources + ${LIBUSDM_DIR}/user_space/vfio/qae_mem_utils_vfio.c + ${LIBUSDM_DIR}/user_space/qae_mem_utils_common.c + ${LIBUSDM_DIR}/user_space/vfio/qae_mem_hugepage_utils_vfio.c) + +add_library(_usdm ${LIBUSDM_sources}) + +target_include_directories(_usdm PRIVATE + ${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include + ${LIBUSDM_DIR} + ${LIBUSDM_DIR}/include + ${LIBUSDM_DIR}/user_space) + +add_library (ch_contrib::usdm ALIAS _usdm) diff --git a/contrib/qatlib-cmake/include/mqueue.h b/contrib/qatlib-cmake/include/mqueue.h new file mode 100644 index 00000000000..7b1125074a8 --- /dev/null +++ b/contrib/qatlib-cmake/include/mqueue.h @@ -0,0 +1,14 @@ +/* This is a workaround for a build conflict issue +1. __GLIBC_PREREQ (referenced in OsalServices.c) is only defined in './sysroot/linux-x86_64/include/features.h' +2. mqueue.h only exist under './sysroot/linux-x86_64-musl/' +This cause target_include_directories for _osal has a conflict between './sysroot/linux-x86_64/include' and './sysroot/linux-x86_64-musl/' +hence create mqueue.h separately under ./qatlib-cmake/include as an alternative. +*/ + +/* Major and minor version number of the GNU C library package. Use + these macros to test for features in specific releases. */ +#define __GLIBC__ 2 +#define __GLIBC_MINOR__ 27 + +#define __GLIBC_PREREQ(maj, min) \ + ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) diff --git a/contrib/rocksdb b/contrib/rocksdb index 66e3cbec314..dead55e60b8 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 66e3cbec31400ed3a23deb878c5d7f56f990f0ae +Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430 diff --git a/contrib/sqids-cpp b/contrib/sqids-cpp index 3756e537d4d..a471f53672e 160000 --- a/contrib/sqids-cpp +++ b/contrib/sqids-cpp @@ -1 +1 @@ -Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0 +Subproject commit a471f53672e98d49223f598528a533b07b085c61 diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index d94ffb893e1..b9c7ea34a36 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -41,6 +41,10 @@ readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLIC readarray -t DISKS_METADATA_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.metadata_path' || true) CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" +CLICKHOUSE_PASSWORD_FILE="${CLICKHOUSE_PASSWORD_FILE:-}" +if [[ -n "${CLICKHOUSE_PASSWORD_FILE}" && -f "${CLICKHOUSE_PASSWORD_FILE}" ]]; then + CLICKHOUSE_PASSWORD="$(cat "${CLICKHOUSE_PASSWORD_FILE}")" +fi CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index c9ce5697182..9079246429f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -44,6 +44,9 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR # It is not needed, we will explicitly create tables on s3. # We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository. rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml + + rm /etc/clickhouse-server/config.d/storage_metadata_with_full_object_key.xml + rm /etc/clickhouse-server/config.d/s3_storage_policy_with_template_object_key.xml fi function start() diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 67056cc1bc1..ad236df0af4 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -193,6 +193,7 @@ stop # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 +export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 configure diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 8b7f86cce5c..8ebab80423f 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -11,7 +11,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec ``` sql CREATE TABLE s3_queue_engine_table (name String, value UInt32) - ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) + ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression]) [SETTINGS] [mode = 'unordered',] [after_processing = 'keep',] diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index ed413959ca6..6d60611ae4b 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -504,24 +504,25 @@ Indexes of type `set` can be utilized by all functions. The other index types ar | Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted | |------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------| -| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | -| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✗ | | [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | | [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | -| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | -| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | -| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | -| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | +| [in](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [empty](/docs/en/sql-reference/functions/array-functions/#empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [notEmpty](/docs/en/sql-reference/functions/array-functions/#notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [has](/docs/en/sql-reference/functions/array-functions/#has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | +| [hasAny](/docs/en/sql-reference/functions/array-functions/#hasany) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | +| [hasAll](/docs/en/sql-reference/functions/array-functions/#hasall) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | | hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | | hasTokenCaseInsensitive (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | @@ -1143,6 +1144,8 @@ Optional parameters: - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). - `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. +- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. ### Configuring the cache diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 50c5ff4457f..fbff622ae38 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. This reduces maintenance effort and avoids redundancy. -:::note -Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results. -::: - ## Configuration Settings and Usage Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 86d24c3a942..9d732c38888 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,5 +1,5 @@ --- -sidebar_label: Settings Overview +title: "Settings Overview" sidebar_position: 1 slug: /en/operations/settings/ pagination_next: en/operations/settings/settings @@ -16,11 +16,34 @@ There are two main groups of ClickHouse settings: - Global server settings - Query-level settings -The main distinction between global server settings and query-level settings is that -global server settings must be set in configuration files while query-level settings -can be set in configuration files or with SQL queries. +The main distinction between global server settings and query-level settings is that global server settings must be set in configuration files, while query-level settings can be set in configuration files or with SQL queries. Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level. -Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level. +Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query level. +## See non-default settings + +To view which settings have been changed from their default value: + +```sql +SELECT name, value FROM system.settings WHERE changed +``` + +If you haven't changed any settings from their default value, then ClickHouse will return nothing. + +To check the value of a particular setting, specify the `name` of the setting in your query: + +```sql +SELECT name, value FROM system.settings WHERE name = 'max_threads' +``` + +This command should return something like: + +```response +┌─name────────┬─value─────┐ +│ max_threads │ 'auto(8)' │ +└─────────────┴───────────┘ + +1 row in set. Elapsed: 0.002 sec. +``` diff --git a/docs/en/operations/settings/mysql-binlog-client.md b/docs/en/operations/settings/mysql-binlog-client.md new file mode 100644 index 00000000000..1e1a2449e1c --- /dev/null +++ b/docs/en/operations/settings/mysql-binlog-client.md @@ -0,0 +1,176 @@ +# The MySQL Binlog Client + +The MySQL Binlog Client provides a mechanism in ClickHouse to share the binlog from a MySQL instance among multiple [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) databases. This avoids consuming unnecessary bandwidth and CPU when replicating more than one schema/database. + +The implementation is resilient against crashes and disk issues. The executed GTID sets of the binlog itself and the consuming databases have persisted only after the data they describe has been safely persisted as well. The implementation also tolerates re-doing aborted operations (at-least-once delivery). + +# Settings + +## use_binlog_client + +Forces to reuse existing MySQL binlog connection or creates new one if does not exist. The connection is defined by `user:pass@host:port`. + +Default value: 0 + +**Example** + +```sql +-- create MaterializedMySQL databases that read the events from the binlog client +CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1 +CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1 +CREATE DATABASE db3 ENGINE = MaterializedMySQL('host:port', 'db3', 'user2', 'password2') SETTINGS use_binlog_client=1 +``` + +Databases `db1` and `db2` will use the same binlog connection, since they use the same `user:pass@host:port`. Database `db3` will use separate binlog connection. + +## max_bytes_in_binlog_queue + +Defines the limit of bytes in the events binlog queue. If bytes in the queue increases this limit, it will stop reading new events from MySQL until the space for new events will be freed. This introduces the memory limits. Very high value could consume all available memory. Very low value could make the databases to wait for new events. + +Default value: 67108864 + +**Example** + +```sql +CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432 +CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1 +``` + +If database `db1` is unable to consume binlog events fast enough and the size of the events queue exceeds `33554432` bytes, reading of new events from MySQL is postponed until `db1` +consumes the events and releases some space. + +NOTE: This will impact to `db2`, and it will be waiting for new events too, since they share the same connection. + +## max_milliseconds_to_wait_in_binlog_queue + +Defines the max milliseconds to wait when `max_bytes_in_binlog_queue` exceeded. After that it will detach the database from current binlog connection and will retry establish new one to prevent other databases to wait for this database. + +Default value: 10000 + +**Example** + +```sql +CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432, max_milliseconds_to_wait_in_binlog_queue=1000 +CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1 +``` + +If the event queue of database `db1` is full, the binlog connection will be waiting in `1000`ms and if the database is not able to consume the events, it will be detached from the connection to create another one. + +NOTE: If the database `db1` has been detached from the shared connection and created new one, after the binlog connections for `db1` and `db2` have the same positions they will be merged to one. And `db1` and `db2` will use the same connection again. + +## max_bytes_in_binlog_dispatcher_buffer + +Defines the max bytes in the binlog dispatcher's buffer before it is flushed to attached binlog. The events from MySQL binlog connection are buffered before sending to attached databases. It increases the events throughput from the binlog to databases. + +Default value: 1048576 + +## max_flush_milliseconds_in_binlog_dispatcher + +Defines the max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlog. If there are no events received from MySQL binlog connection for a while, after some time buffered events should be sent to the attached databases. + +Default value: 1000 + +# Design + +## The Binlog Events Dispatcher + +Currently each MaterializedMySQL database opens its own connection to MySQL to subscribe to binlog events. There is a need to have only one connection and _dispatch_ the binlog events to all databases that replicate from the same MySQL instance. + +## Each MaterializedMySQL Database Has Its Own Event Queue + +To prevent slowing down other instances there should be an _event queue_ per MaterializedMySQL database to handle the events independently of the speed of other instances. The dispatcher reads an event from the binlog, and sends it to every MaterializedMySQL database that needs it. Each database handles its events in separate threads. + +## Catching up + +If several databases have the same binlog position, they can use the same dispatcher. If a newly created database (or one that has been detached for some time) requests events that have been already processed, we need to create another communication _channel_ to the binlog. We do this by creating another temporary dispatcher for such databases. When the new dispatcher _catches up with_ the old one, the new/temporary dispatcher is not needed anymore and all databases getting events from this dispatcher can be moved to the old one. + +## Memory Limit + +There is a _memory limit_ to control event queue memory consumption per MySQL Client. If a database is not able to handle events fast enough, and the event queue is getting full, we have the following options: + +1. The dispatcher is blocked until the slowest database frees up space for new events. All other databases are waiting for the slowest one. (Preferred) +2. The dispatcher is _never_ blocked, but suspends incremental sync for the slow database and continues dispatching events to remained databases. + +## Performance + +A lot of CPU can be saved by not processing every event in every database. The binlog contains events for all databases, it is wasteful to distribute row events to a database that it will not process it, especially if there are a lot of databases. This requires some sort of per-database binlog filtering and buffering. + +Currently all events are sent to all MaterializedMySQL databases but parsing the event which consumes CPU is up to the database. + +# Detailed Design + +1. If a client (e.g. database) wants to read a stream of the events from MySQL binlog, it creates a connection to remote binlog by host/user/password and _executed GTID set_ params. +2. If another client wants to read the events from the binlog but for different _executed GTID set_, it is **not** possible to reuse existing connection to MySQL, then need to create another connection to the same remote binlog. (_This is how it is implemented today_). +3. When these 2 connections get the same binlog positions, they read the same events. It is logical to drop duplicate connection and move all its users out. And now one connection dispatches binlog events to several clients. Obviously only connections to the same binlog should be merged. + +## Classes + +1. One connection can send (or dispatch) events to several clients and might be called `BinlogEventsDispatcher`. +2. Several dispatchers grouped by _user:password@host:port_ in `BinlogClient`. Since they point to the same binlog. +3. The clients should communicate only with public API from `BinlogClient`. The result of using `BinlogClient` is an object that implements `IBinlog` to read events from. This implementation of `IBinlog` must be compatible with old implementation `MySQLFlavor` -> when replacing old implementation by new one, the behavior must not be changed. + +## SQL + +```sql +-- create MaterializedMySQL databases that read the events from the binlog client +CREATE DATABASE db1_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=1024; +CREATE DATABASE db2_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1; +CREATE DATABASE db3_client1 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1; +CREATE DATABASE db4_client2 ENGINE = MaterializedMySQL('host2:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1; +CREATE DATABASE db5_client3 ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=1; +CREATE DATABASE db6_old ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=0; +``` + +Databases `db1_client1`, `db2_client1` and `db3_client1` share one instance of `BinlogClient` since they have the same params. `BinlogClient` will create 3 connections to MySQL server thus 3 instances of `BinlogEventsDispatcher`, but if these connections would have the same binlog position, they should be merged to one connection. Means all clients will be moved to one dispatcher and others will be closed. Databases `db4_client2` and `db5_client3` would use 2 different independent `BinlogClient` instances. Database `db6_old` will use old implementation. NOTE: By default `use_binlog_client` is disabled. Setting `max_bytes_in_binlog_queue` defines the max allowed bytes in the binlog queue. By default, it is `1073741824` bytes. If number of bytes exceeds this limit, the dispatching will be stopped until the space will be freed for new events. + +## Binlog Table Structure + +To see the status of the all `BinlogClient` instances there is `system.mysql_binlogs` system table. It shows the list of all created and _alive_ `IBinlog` instances with information about its `BinlogEventsDispatcher` and `BinlogClient`. + +Example: + +``` +SELECT * FROM system.mysql_binlogs FORMAT Vertical +Row 1: +────── +binlog_client_name: root@127.0.0.1:3306 +name: test_Clickhouse1 +mysql_binlog_name: binlog.001154 +mysql_binlog_pos: 7142294 +mysql_binlog_timestamp: 1660082447 +mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304 +dispatcher_name: Applier +dispatcher_mysql_binlog_name: binlog.001154 +dispatcher_mysql_binlog_pos: 7142294 +dispatcher_mysql_binlog_timestamp: 1660082447 +dispatcher_mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304 +size: 0 +bytes: 0 +max_bytes: 0 +``` + +### Tests + +Unit tests: + +``` +$ ./unit_tests_dbms --gtest_filter=MySQLBinlog.* +``` + +Integration tests: + +``` +$ pytest -s -vv test_materialized_mysql_database/test.py::test_binlog_client +``` + +Dumps events from the file + +``` +$ ./utils/check-mysql-binlog/check-mysql-binlog --binlog binlog.001392 +``` + +Dumps events from the server + +``` +$ ./utils/check-mysql-binlog/check-mysql-binlog --host 127.0.0.1 --port 3306 --user root --password pass --gtid a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30462856 +``` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index d4ee8106320..f085fe1abcd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4773,6 +4773,45 @@ Type: Int64 Default: 0 +## enable_deflate_qpl_codec {#enable_deflate_qpl_codec} + +If turned on, the DEFLATE_QPL codec may be used to compress columns. + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Type: Bool + +## enable_zstd_qat_codec {#enable_zstd_qat_codec} + +If turned on, the ZSTD_QAT codec may be used to compress columns. + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Type: Bool + +## output_format_compression_level + +Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when writing to table functions `file`, `url`, `hdfs`, `s3`, or `azureBlobStorage`. + +Possible values: from `1` to `22` + +Default: `3` + + +## output_format_compression_zstd_window_log + +Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression. This can help to achieve a better compression ratio. + +Possible values: non-negative numbers. Note that if the value is too small or too big, `zstdlib` will throw an exception. Typical values are from `20` (window size = `1MB`) to `30` (window size = `1GB`). + +Default: `0` + ## rewrite_count_distinct_if_with_count_distinct_implementation Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting. @@ -5157,4 +5196,4 @@ The value 0 means that you can delete all tables without any restrictions. :::note This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop) -::: \ No newline at end of file +::: diff --git a/docs/en/operations/system-tables/dropped_tables_parts.md b/docs/en/operations/system-tables/dropped_tables_parts.md new file mode 100644 index 00000000000..095f35287fe --- /dev/null +++ b/docs/en/operations/system-tables/dropped_tables_parts.md @@ -0,0 +1,14 @@ +--- +slug: /en/operations/system-tables/dropped_tables_parts +--- +# dropped_tables_parts {#system_tables-dropped_tables_parts} + +Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) dropped tables from [system.dropped_tables](./dropped_tables.md) + +The schema of this table is the same as [system.parts](./parts.md) + +**See Also** + +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [system.parts](./parts.md) +- [system.dropped_tables](./dropped_tables.md) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 7fcc4928355..d48eb31df00 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -42,7 +42,7 @@ Columns: - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Query starting time with microseconds precision. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 6ac1ddbf667..0c39499e190 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -10,7 +10,7 @@ Columns: - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` (Date) — Date of the entry. - `event_time` (DateTime) — Time of the entry. -- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. +- `event_time_microseconds` (DateTime64) — Time of the entry with microseconds precision. - `microseconds` (UInt32) — Microseconds of the entry. - `thread_name` (String) — Name of the thread from which the logging was done. - `thread_id` (UInt64) — OS thread ID. diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md index 3e4295598aa..879bf9d71ac 100644 --- a/docs/en/operations/utilities/clickhouse-format.md +++ b/docs/en/operations/utilities/clickhouse-format.md @@ -11,6 +11,8 @@ Keys: - `--query` — Format queries of any length and complexity. - `--hilite` — Add syntax highlight with ANSI terminal escape sequences. - `--oneline` — Format in single line. +- `--max_line_length` — Format in single line queries with length less than specified. +- `--comments` — Keep comments in the output. - `--quiet` or `-q` — Just check syntax, no output on success. - `--multiquery` or `-n` — Allow multiple queries in the same file. - `--obfuscate` — Obfuscate instead of formatting. diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index d6e11fb9613..4588f68cacd 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -24,7 +24,7 @@ A client application to interact with clickhouse-keeper by its native protocol. ## Example {#clickhouse-keeper-client-example} ```bash -./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +./clickhouse-keeper-client -h localhost -p 9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 Connected to ZooKeeper at [::1]:9181 with session_id 137 / :) ls keeper foo bar diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 42a1ca5aaac..1adff18f598 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -18,6 +18,12 @@ Supported range of values: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\]. Resolution: 1 second. +## Speed + +The `Date` datatype is faster than `DateTime` under _most_ conditions. + +The `Date` type requires 2 bytes of storage, while `DateTime` requires 4. However, when the database compresses the database, this difference is amplified. This amplification is due to the minutes and seconds in `DateTime` being less compressible. Filtering and aggregating `Date` instead of `DateTime` is also faster. + ## Usage Remarks The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’). diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index f5da00a8663..1639f45e66c 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -6,7 +6,7 @@ sidebar_label: Arrays # Array Functions -## empty +## empty {#empty} Checks whether the input array is empty. @@ -50,7 +50,7 @@ Result: └────────────────┘ ``` -## notEmpty +## notEmpty {#notempty} Checks whether the input array is non-empty. @@ -221,7 +221,7 @@ SELECT has([1, 2, NULL], NULL) └─────────────────────────┘ ``` -## hasAll +## hasAll {#hasall} Checks whether one array is a subset of another. @@ -261,7 +261,7 @@ Raises an exception `NO_COMMON_TYPE` if the set and subset elements do not share `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]])` returns 0. -## hasAny +## hasAny {#hasany} Checks whether two arrays have intersection by some elements. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 2c6a468af0e..90c7d8c2206 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1777,34 +1777,67 @@ Result: └────────────────────────────────────────────────────────────────────────┘ ``` -## sqid +## sqidEncode -Transforms numbers into a [Sqid](https://sqids.org/) which is a YouTube-like ID string. +Encodes numbers as a [Sqid](https://sqids.org/) which is a YouTube-like ID string. The output alphabet is `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`. -Do not use this function for hashing - the generated IDs can be decoded back into numbers. +Do not use this function for hashing - the generated IDs can be decoded back into the original numbers. **Syntax** ```sql -sqid(number1, ...) +sqidEncode(number1, ...) ``` +Alias: `sqid` + **Arguments** - A variable number of UInt8, UInt16, UInt32 or UInt64 numbers. **Returned Value** -A hash id [String](/docs/en/sql-reference/data-types/string.md). +A sqid [String](/docs/en/sql-reference/data-types/string.md). **Example** ```sql -SELECT sqid(1, 2, 3, 4, 5); +SELECT sqidEncode(1, 2, 3, 4, 5); ``` ```response -┌─sqid(1, 2, 3, 4, 5)─┐ -│ gXHfJ1C6dN │ -└─────────────────────┘ +┌─sqidEncode(1, 2, 3, 4, 5)─┐ +│ gXHfJ1C6dN │ +└───────────────────────────┘ +``` + +## sqidDecode + +Decodes a [Sqid](https://sqids.org/) back into its original numbers. +Returns an empty array in case the input string is not a valid sqid. + +**Syntax** + +```sql +sqidDecode(sqid) +``` + +**Arguments** + +- A sqid - [String](/docs/en/sql-reference/data-types/string.md) + +**Returned Value** + +The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md). + +**Example** + +```sql +SELECT sqidDecode('gXHfJ1C6dN'); +``` + +```response +┌─sqidDecode('gXHfJ1C6dN')─┐ +│ [1,2,3,4,5] │ +└──────────────────────────┘ ``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 20694211912..a2f1b0d7752 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -731,7 +731,7 @@ Alias: `FROM_BASE64`. Like `base64Decode` but returns an empty string in case of error. -## endsWith +## endsWith {#endswith} Returns whether string `str` ends with `suffix`. @@ -765,7 +765,7 @@ Result: └──────────────────────────┴──────────────────────┘ ``` -## startsWith +## startsWith {#startswith} Returns whether string `str` starts with `prefix`. @@ -1383,6 +1383,148 @@ Result: └──────────────────┘ ``` +## punycodeEncode + +Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) representation of a string. +The string must be UTF8-encoded, otherwise the behavior is undefined. + +**Syntax** + +``` sql +punycodeEncode(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- A Punycode representation of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select punycodeEncode('München'); +``` + +Result: + +```result +┌─punycodeEncode('München')─┐ +│ Mnchen-3ya │ +└───────────────────────────┘ +``` + +## punycodeDecode + +Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string. +If no valid Punycode-encoded string is given, an exception is thrown. + +**Syntax** + +``` sql +punycodeEncode(val) +``` + +**Arguments** + +- `val` - Punycode-encoded string. [String](../data-types/string.md) + +**Returned value** + +- The plaintext of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select punycodeDecode('Mnchen-3ya'); +``` + +Result: + +```result +┌─punycodeDecode('Mnchen-3ya')─┐ +│ München │ +└──────────────────────────────┘ +``` + +## tryPunycodeDecode + +Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded string is given. + +## idnaEncode + +Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown. +Note: No percent decoding or trimming of tabs, spaces or control characters is performed. + +**Syntax** + +```sql +idnaEncode(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- A ASCII representation according to the IDNA mechanism of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select idnaEncode('straße.münchen.de'); +``` + +Result: + +```result +┌─idnaEncode('straße.münchen.de')─────┐ +│ xn--strae-oqa.xn--mnchen-3ya.de │ +└─────────────────────────────────────┘ +``` + +## tryIdnaEncode + +Like `idnaEncode` but returns an empty string in case of an error instead of throwing an exception. + +## idnaDecode + +Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +In case of an error (e.g. because the input is invalid), the input string is returned. +Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization. + +**Syntax** + +```sql +idnaDecode(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- A Unicode (UTF-8) representation according to the IDNA mechanism of the input value. [String](../data-types/string.md) + +**Example** + +``` sql +select idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de'); +``` + +Result: + +```result +┌─idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de')─┐ +│ straße.münchen.de │ +└───────────────────────────────────────────────┘ +``` + ## byteHammingDistance Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings. @@ -1463,6 +1605,78 @@ Result: Alias: levenshteinDistance +## damerauLevenshteinDistance + +Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings. + +**Syntax** + +```sql +damerauLevenshteinDistance(string1, string2) +``` + +**Examples** + +``` sql +SELECT damerauLevenshteinDistance('clickhouse', 'mouse'); +``` + +Result: + +``` text +┌─damerauLevenshteinDistance('clickhouse', 'mouse')─┐ +│ 6 │ +└───────────────────────────────────────────────────┘ +``` + +## jaroSimilarity + +Calculates the [Jaro similarity](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro_similarity) between two byte strings. + +**Syntax** + +```sql +jaroSimilarity(string1, string2) +``` + +**Examples** + +``` sql +SELECT jaroSimilarity('clickhouse', 'click'); +``` + +Result: + +``` text +┌─jaroSimilarity('clickhouse', 'click')─┐ +│ 0.8333333333333333 │ +└───────────────────────────────────────┘ +``` + +## jaroWinklerSimilarity + +Calculates the [Jaro-Winkler similarity](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro%E2%80%93Winkler_similarity) between two byte strings. + +**Syntax** + +```sql +jaroWinklerSimilarity(string1, string2) +``` + +**Examples** + +``` sql +SELECT jaroWinklerSimilarity('clickhouse', 'click'); +``` + +Result: + +``` text +┌─jaroWinklerSimilarity('clickhouse', 'click')─┐ +│ 0.8999999999999999 │ +└──────────────────────────────────────────────┘ +``` + ## initcap Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 1cb71e6f35d..d5dbca3f2b7 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -207,7 +207,7 @@ Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` an multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) ``` -## multiSearchAny +## multiSearchAny {#multisearchany} Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. @@ -219,7 +219,7 @@ Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearch multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) ``` -## match +## match {#match} Returns whether string `haystack` matches the regular expression `pattern` in [re2 regular syntax](https://github.com/google/re2/wiki/Syntax). @@ -414,7 +414,7 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## like +## like {#like} Returns whether string `haystack` matches the LIKE expression `pattern`. @@ -445,7 +445,7 @@ like(haystack, pattern) Alias: `haystack LIKE pattern` (operator) -## notLike +## notLike {#notlike} Like `like` but negates the result. diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index 434432baa48..144d832b36a 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -57,3 +57,56 @@ Result: │ 6 │ └─────────┘ ``` + +## seriesDecomposeSTL + +Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. + +**Syntax** + +``` sql +seriesDecomposeSTL(series, period); +``` + +**Arguments** + +- `series` - An array of numeric values +- `period` - A positive integer + +The number of data points in `series` should be at least twice the value of `period`. + +**Returned value** + +- An array of three arrays where the first array include seasonal components, the second array - trend, +and the third array - residue component. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34], 3) AS print_0; +``` + +Result: + +``` text +┌───────────print_0──────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [[ + -13.529999, -3.1799996, 16.71, -13.53, -3.1799996, 16.71, -13.53, -3.1799996, + 16.71, -13.530001, -3.18, 16.710001, -13.530001, -3.1800003, 16.710001, -13.530001, + -3.1800003, 16.710001, -13.530001, -3.1799994, 16.71, -13.529999, -3.1799994, 16.709997 + ], + [ + 23.63, 23.63, 23.630003, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, + 23.630001, 23.630001, 23.630001, 23.63, 23.630001, 23.630001, 23.63, 23.630001, + 23.630001, 23.63, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, 23.630003 + ], + [ + 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, + 0 + ]] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 602feb69d8a..7322bc17b76 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -293,6 +293,8 @@ You can't combine both ways in one query. Along with columns descriptions constraints could be defined: +### CONSTRAINT + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( @@ -307,6 +309,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Adding large amount of constraints can negatively affect performance of big `INSERT` queries. +### ASSUME + +The `ASSUME` clause is used to define a `CONSTRAINT` on a table that is assumed to be true. This constraint can then be used by the optimizer to enhance the performance of SQL queries. + +Take this example where `ASSUME CONSTRAINT` is used in the creation of the `users_a` table: + +```sql +CREATE TABLE users_a ( + uid Int16, + name String, + age Int16, + name_len UInt8 MATERIALIZED length(name), + CONSTRAINT c1 ASSUME length(name) = name_len +) +ENGINE=MergeTree +ORDER BY (name_len, name); +``` + +Here, `ASSUME CONSTRAINT` is used to assert that the `length(name)` function always equals the value of the `name_len` column. This means that whenever `length(name)` is called in a query, ClickHouse can replace it with `name_len`, which should be faster because it avoids calling the `length()` function. + +Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;`, ClickHouse can optimize it to `SELECT name FROM users_a WHERE name_len < 5`; because of the `ASSUME CONSTRAINT`. This can make the query run faster because it avoids calculating the length of `name` for each row. + +`ASSUME CONSTRAINT` **does not enforce the constraint**, it merely informs the optimizer that the constraint holds true. If the constraint is not actually true, the results of the queries may be incorrect. Therefore, you should only use `ASSUME CONSTRAINT` if you are sure that the constraint is true. + ## TTL Expression Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). @@ -372,15 +398,23 @@ ClickHouse supports general purpose codecs and specialized codecs. #### ZSTD -`ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default value: 1. +`ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default level: 1. High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage. +#### ZSTD_QAT + +`ZSTD_QAT[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable level, implemented by [Intel® QATlib](https://github.com/intel/qatlib) and [Intel® QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Possible levels: \[1, 12\]. Default level: 1. Recommended level range: \[6, 12\]. Some limitations apply: + +- ZSTD_QAT is disabled by default and can only be used after enabling configuration setting [enable_zstd_qat_codec](../../../operations/settings/settings.md#enable_zstd_qat_codec). +- For compression, ZSTD_QAT tries to use an Intel® QAT offloading device ([QuickAssist Technology](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)). If no such device was found, it will fallback to ZSTD compression in software. +- Decompression is always performed in software. + #### DEFLATE_QPL `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_deflate_qpl_codec = 1`. +- DEFLATE_QPL is disabled by default and can only be used after enabling configuration setting [enable_deflate_qpl_codec](../../../operations/settings/settings.md#enable_deflate_qpl_codec). - DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index 9045ec4aba3..b3e34b9c4bb 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -11,7 +11,7 @@ Its name comes from the fact that it can be looked at as executing `JOIN` with a Syntax: -``` sql +```sql SELECT FROM [LEFT] ARRAY JOIN @@ -30,7 +30,7 @@ Supported types of `ARRAY JOIN` are listed below: The examples below demonstrate the usage of the `ARRAY JOIN` and `LEFT ARRAY JOIN` clauses. Let’s create a table with an [Array](../../../sql-reference/data-types/array.md) type column and insert values into it: -``` sql +```sql CREATE TABLE arrays_test ( s String, @@ -41,7 +41,7 @@ INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); ``` -``` text +```response ┌─s───────────┬─arr─────┐ │ Hello │ [1,2] │ │ World │ [3,4,5] │ @@ -51,13 +51,13 @@ VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); The example below uses the `ARRAY JOIN` clause: -``` sql +```sql SELECT s, arr FROM arrays_test ARRAY JOIN arr; ``` -``` text +```response ┌─s─────┬─arr─┐ │ Hello │ 1 │ │ Hello │ 2 │ @@ -69,13 +69,13 @@ ARRAY JOIN arr; The next example uses the `LEFT ARRAY JOIN` clause: -``` sql +```sql SELECT s, arr FROM arrays_test LEFT ARRAY JOIN arr; ``` -``` text +```response ┌─s───────────┬─arr─┐ │ Hello │ 1 │ │ Hello │ 2 │ @@ -90,13 +90,13 @@ LEFT ARRAY JOIN arr; An alias can be specified for an array in the `ARRAY JOIN` clause. In this case, an array item can be accessed by this alias, but the array itself is accessed by the original name. Example: -``` sql +```sql SELECT s, arr, a FROM arrays_test ARRAY JOIN arr AS a; ``` -``` text +```response ┌─s─────┬─arr─────┬─a─┐ │ Hello │ [1,2] │ 1 │ │ Hello │ [1,2] │ 2 │ @@ -108,13 +108,13 @@ ARRAY JOIN arr AS a; Using aliases, you can perform `ARRAY JOIN` with an external array. For example: -``` sql +```sql SELECT s, arr_external FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external; ``` -``` text +```response ┌─s───────────┬─arr_external─┐ │ Hello │ 1 │ │ Hello │ 2 │ @@ -130,13 +130,13 @@ ARRAY JOIN [1, 2, 3] AS arr_external; Multiple arrays can be comma-separated in the `ARRAY JOIN` clause. In this case, `JOIN` is performed with them simultaneously (the direct sum, not the cartesian product). Note that all the arrays must have the same size by default. Example: -``` sql +```sql SELECT s, arr, a, num, mapped FROM arrays_test ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS mapped; ``` -``` text +```response ┌─s─────┬─arr─────┬─a─┬─num─┬─mapped─┐ │ Hello │ [1,2] │ 1 │ 1 │ 2 │ │ Hello │ [1,2] │ 2 │ 2 │ 3 │ @@ -148,13 +148,13 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma The example below uses the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function: -``` sql +```sql SELECT s, arr, a, num, arrayEnumerate(arr) FROM arrays_test ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; ``` -``` text +```response ┌─s─────┬─arr─────┬─a─┬─num─┬─arrayEnumerate(arr)─┐ │ Hello │ [1,2] │ 1 │ 1 │ [1,2] │ │ Hello │ [1,2] │ 2 │ 2 │ [1,2] │ @@ -163,6 +163,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; │ World │ [3,4,5] │ 5 │ 3 │ [1,2,3] │ └───────┴─────────┴───┴─────┴─────────────────────┘ ``` + Multiple arrays with different sizes can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example: ```sql @@ -171,7 +172,7 @@ FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b SETTINGS enable_unaligned_array_join = 1; ``` -```text +```response ┌─s───────┬─arr─────┬─a─┬─b─────────┐ │ Hello │ [1,2] │ 1 │ ['a','b'] │ │ Hello │ [1,2] │ 2 │ ['c'] │ @@ -187,7 +188,7 @@ SETTINGS enable_unaligned_array_join = 1; `ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md): -``` sql +```sql CREATE TABLE nested_test ( s String, @@ -200,7 +201,7 @@ INSERT INTO nested_test VALUES ('Hello', [1,2], [10,20]), ('World', [3,4,5], [30,40,50]), ('Goodbye', [], []); ``` -``` text +```response ┌─s───────┬─nest.x──┬─nest.y─────┐ │ Hello │ [1,2] │ [10,20] │ │ World │ [3,4,5] │ [30,40,50] │ @@ -208,13 +209,13 @@ VALUES ('Hello', [1,2], [10,20]), ('World', [3,4,5], [30,40,50]), ('Goodbye', [] └─────────┴─────────┴────────────┘ ``` -``` sql +```sql SELECT s, `nest.x`, `nest.y` FROM nested_test ARRAY JOIN nest; ``` -``` text +```response ┌─s─────┬─nest.x─┬─nest.y─┐ │ Hello │ 1 │ 10 │ │ Hello │ 2 │ 20 │ @@ -226,13 +227,13 @@ ARRAY JOIN nest; When specifying names of nested data structures in `ARRAY JOIN`, the meaning is the same as `ARRAY JOIN` with all the array elements that it consists of. Examples are listed below: -``` sql +```sql SELECT s, `nest.x`, `nest.y` FROM nested_test ARRAY JOIN `nest.x`, `nest.y`; ``` -``` text +```response ┌─s─────┬─nest.x─┬─nest.y─┐ │ Hello │ 1 │ 10 │ │ Hello │ 2 │ 20 │ @@ -244,13 +245,13 @@ ARRAY JOIN `nest.x`, `nest.y`; This variation also makes sense: -``` sql +```sql SELECT s, `nest.x`, `nest.y` FROM nested_test ARRAY JOIN `nest.x`; ``` -``` text +```response ┌─s─────┬─nest.x─┬─nest.y─────┐ │ Hello │ 1 │ [10,20] │ │ Hello │ 2 │ [10,20] │ @@ -262,13 +263,13 @@ ARRAY JOIN `nest.x`; An alias may be used for a nested data structure, in order to select either the `JOIN` result or the source array. Example: -``` sql +```sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y` FROM nested_test ARRAY JOIN nest AS n; ``` -``` text +```response ┌─s─────┬─n.x─┬─n.y─┬─nest.x──┬─nest.y─────┐ │ Hello │ 1 │ 10 │ [1,2] │ [10,20] │ │ Hello │ 2 │ 20 │ [1,2] │ [10,20] │ @@ -280,13 +281,13 @@ ARRAY JOIN nest AS n; Example of using the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function: -``` sql +```sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num FROM nested_test ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; ``` -``` text +```response ┌─s─────┬─n.x─┬─n.y─┬─nest.x──┬─nest.y─────┬─num─┐ │ Hello │ 1 │ 10 │ [1,2] │ [10,20] │ 1 │ │ Hello │ 2 │ 20 │ [1,2] │ [10,20] │ 2 │ @@ -300,6 +301,11 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; The query execution order is optimized when running `ARRAY JOIN`. Although `ARRAY JOIN` must always be specified before the [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) clause in a query, technically they can be performed in any order, unless result of `ARRAY JOIN` is used for filtering. The processing order is controlled by the query optimizer. +### Incompatibility with short-circuit function evaluation + +[Short-circuit function evaluation](../../../operations/settings/index.md#short-circuit-function-evaluation) is a feature that optimizes the execution of complex expressions in specific functions such as `if`, `multiIf`, `and`, and `or`. It prevents potential exceptions, such as division by zero, from occurring during the execution of these functions. + +`arrayJoin` is always executed and not supported for short circuit function evaluation. That's because it's a unique function processed separately from all other functions during query analysis and execution and requires additional logic that doesn't work with short circuit function execution. The reason is that the number of rows in the result depends on the arrayJoin result, and it's too complex and expensive to implement lazy execution of `arrayJoin`. ## Related content diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 0529be06b5d..95d0489d694 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -12,7 +12,7 @@ Join produces a new table by combining columns from one or multiple tables by us ``` sql SELECT FROM -[GLOBAL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI|ANY|ASOF] JOIN +[GLOBAL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI|ANY|ALL|ASOF] JOIN (ON )|(USING ) ... ``` @@ -296,6 +296,34 @@ PASTE JOIN │ 1 │ 0 │ └───┴──────┘ ``` +Note: In this case result can be nondeterministic if the reading is parallel. Example: +```SQL +SELECT * +FROM +( + SELECT number AS a + FROM numbers_mt(5) +) AS t1 +PASTE JOIN +( + SELECT number AS a + FROM numbers(10) + ORDER BY a DESC +) AS t2 +SETTINGS max_block_size = 2; + +┌─a─┬─t2.a─┐ +│ 2 │ 9 │ +│ 3 │ 8 │ +└───┴──────┘ +┌─a─┬─t2.a─┐ +│ 0 │ 7 │ +│ 1 │ 6 │ +└───┴──────┘ +┌─a─┬─t2.a─┐ +│ 4 │ 5 │ +└───┴──────┘ +``` ## Distributed JOIN diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 1dfdcb3c745..59505d08f5c 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1559,7 +1559,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( QueryPipeline input; QueryPipeline output; { - BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute(); + BlockIO io_insert = InterpreterFactory::instance().get(query_insert_ast, context_insert)->execute(); InterpreterSelectWithUnionQuery select(query_select_ast, context_select, SelectQueryOptions{}); QueryPlan plan; @@ -1944,7 +1944,7 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, auto local_context = Context::createCopy(context); local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline; + auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; PullingPipelineExecutor executor(pipeline); Block block; executor.pull(block); @@ -1989,7 +1989,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi auto local_context = Context::createCopy(context); local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::get(query_ast, local_context)->execute().pipeline; + auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; PullingPipelineExecutor executor(pipeline); Block result; executor.pull(result); diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index e3371185aad..53f79888573 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -157,6 +158,7 @@ void ClusterCopierApp::mainImpl() context->setApplicationType(Context::ApplicationType::LOCAL); context->setPath(process_path + "/"); + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); diff --git a/programs/extract-from-config/ExtractFromConfig.cpp b/programs/extract-from-config/ExtractFromConfig.cpp index eb9385ce55e..7c3e80aa78f 100644 --- a/programs/extract-from-config/ExtractFromConfig.cpp +++ b/programs/extract-from-config/ExtractFromConfig.cpp @@ -17,15 +17,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include static void setupLogging(const std::string & log_level) { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 05ba86069d7..a1c51565ae3 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -3,16 +3,19 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -29,22 +32,49 @@ #include #include #include +#include +namespace DB::ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + +void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments) +{ + do + { + /// skip spaces to avoid throw exception after last query + while (pos != end && std::isspace(*pos)) + ++pos; + + const char * comment_begin = pos; + /// for skip comment after the last query and to not throw exception + if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-') + { + pos += 2; + /// skip until the end of the line + while (pos != end && *pos != '\n') + ++pos; + if (print_comments) + std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n"; + } + /// need to parse next sql + else + break; + } while (pos != end); +} + +} + #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" extern const char * auto_time_zones[]; - -namespace DB -{ -namespace ErrorCodes -{ -extern const int INVALID_FORMAT_INSERT_QUERY_WITH_DATA; -} -} - int mainEntryClickHouseFormat(int argc, char ** argv) { using namespace DB; @@ -55,8 +85,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) desc.add_options() ("query", po::value(), "query to format") ("help,h", "produce help message") + ("comments", "keep comments in the output") ("hilite", "add syntax highlight with ANSI terminal escape sequences") ("oneline", "format in single line") + ("max_line_length", po::value()->default_value(0), "format in single line queries with length less than specified") ("quiet,q", "just check syntax, no output on success") ("multiquery,n", "allow multiple queries in the same file") ("obfuscate", "obfuscate instead of formatting") @@ -88,6 +120,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) bool oneline = options.count("oneline"); bool quiet = options.count("quiet"); bool multiple = options.count("multiquery"); + bool print_comments = options.count("comments"); + size_t max_line_length = options["max_line_length"].as(); bool obfuscate = options.count("obfuscate"); bool backslash = options.count("backslash"); bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert"); @@ -104,6 +138,19 @@ int mainEntryClickHouseFormat(int argc, char ** argv) return 2; } + if (oneline && max_line_length) + { + std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl; + return 2; + } + + if (max_line_length > 255) + { + std::cerr << "Option 'max_line_length' must be less than 256." << std::endl; + return 2; + } + + String query; if (options.count("query")) @@ -124,10 +171,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (options.count("seed")) { - std::string seed; hash_func.update(options["seed"].as()); } + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); @@ -179,30 +226,75 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { const char * pos = query.data(); const char * end = pos + query.size(); + skipSpacesAndComments(pos, end, print_comments); ParserQuery parser(end, allow_settings_after_format_in_insert); - do + while (pos != end) { + size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos; + ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); - /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure, - /// we should throw an exception early, and make exception message more readable. - if (const auto * insert_query = res->as(); insert_query && insert_query->data) + std::unique_ptr insert_query_payload = nullptr; + /// If the query is INSERT ... VALUES, then we will try to parse the data. + if (auto * insert_query = res->as(); insert_query && insert_query->data) { - throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, - "Can't format ASTInsertQuery with data, since data will be lost"); + if ("Values" != insert_query->format) + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Can't format INSERT query with data format '{}'", insert_query->format); + + /// Reset format to default to have `INSERT INTO table VALUES` instead of `INSERT INTO table VALUES FORMAT Values` + insert_query->format = {}; + + /// We assume that data ends with a newline character (same as client does) + const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end); + insert_query->end = this_query_end; + pos = this_query_end; + insert_query_payload = getReadBufferFromASTInsertQuery(res); } if (!quiet) { if (!backslash) { - WriteBufferFromOStream res_buf(std::cout, 4096); - formatAST(*res, res_buf, hilite, oneline); - res_buf.finalize(); - if (multiple) - std::cout << "\n;\n"; + WriteBufferFromOwnString str_buf; + formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length); + + if (insert_query_payload) + { + str_buf.write(' '); + copyData(*insert_query_payload, str_buf); + } + + String res_string = str_buf.str(); + const char * s_pos = res_string.data(); + const char * s_end = s_pos + res_string.size(); + /// remove trailing spaces + while (s_end > s_pos && isWhitespaceASCIIOneLine(*(s_end - 1))) + --s_end; + WriteBufferFromOStream res_cout(std::cout, 4096); + /// For multiline queries we print ';' at new line, + /// but for single line queries we print ';' at the same line + bool has_multiple_lines = false; + while (s_pos != s_end) + { + if (*s_pos == '\n') + has_multiple_lines = true; + res_cout.write(*s_pos++); + } + res_cout.finalize(); + + if (multiple && !insert_query_payload) + { + if (oneline || !has_multiple_lines) + std::cout << ";\n"; + else + std::cout << "\n;\n"; + } + else if (multiple && insert_query_payload) + /// Do not need to add ; because it's already in the insert_query_payload + std::cout << "\n"; + std::cout << std::endl; } /// add additional '\' at the end of each line; @@ -230,27 +322,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) std::cout << std::endl; } } - - do - { - /// skip spaces to avoid throw exception after last query - while (pos != end && std::isspace(*pos)) - ++pos; - - /// for skip comment after the last query and to not throw exception - if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-') - { - pos += 2; - /// skip until the end of the line - while (pos != end && *pos != '\n') - ++pos; - } - /// need to parse next sql - else - break; - } while (pos != end); - - } while (multiple && pos != end); + skipSpacesAndComments(pos, end, print_comments); + if (!multiple) + break; + } } } catch (...) diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index b9bc6c7e8df..16244232bee 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -24,15 +25,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - static constexpr auto documentation = R"( A tool to extract information from Git repository for analytics. diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index b8f538f821c..ff7c8e75f1c 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -95,6 +95,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index 9642dd7ee63..b0b465460e0 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -2,6 +2,7 @@ #include "CatBoostLibraryHandler.h" #include "CatBoostLibraryHandlerFactory.h" +#include "Common/ProfileEvents.h" #include "ExternalDictionaryLibraryHandler.h" #include "ExternalDictionaryLibraryHandlerFactory.h" @@ -44,7 +45,7 @@ namespace response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << message << std::endl; + *response.send() << message << '\n'; LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message)); } @@ -96,7 +97,7 @@ ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRe } -void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { LOG_TRACE(log, "Request URI: {}", request.getURI()); HTMLForm params(getContext()->getSettingsRef(), request); @@ -384,7 +385,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi } -void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { @@ -423,7 +424,7 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler( } -void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { LOG_TRACE(log, "Request URI: {}", request.getURI()); HTMLForm params(getContext()->getSettingsRef(), request); @@ -463,6 +464,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ { if (method == "catboost_list") { + auto & read_buf = request.getStream(); + params.read(read_buf); + ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos(); writeIntBinary(static_cast(model_infos.size()), out); @@ -500,6 +504,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ } else if (method == "catboost_removeAllModels") { + auto & read_buf = request.getStream(); + params.read(read_buf); + CatBoostLibraryHandlerFactory::instance().removeAllModels(); String res = "1"; @@ -621,7 +628,7 @@ CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t ke } -void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 16815e84723..4f08d7a6084 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -20,7 +20,7 @@ class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, public: ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: static constexpr inline auto FORMAT = "RowBinary"; @@ -36,7 +36,7 @@ class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, public: ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: const size_t keep_alive_timeout; @@ -65,7 +65,7 @@ class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithConte public: CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: const size_t keep_alive_timeout; @@ -79,7 +79,7 @@ class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContex public: CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: const size_t keep_alive_timeout; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index ccd3d84630f..c9841277b6d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -486,6 +487,7 @@ try Poco::ErrorHandler::set(&error_handler); } + registerInterpreters(); /// Don't initialize DateLUT registerFunctions(); registerAggregateFunctions(); diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 434abf0bf14..774883657b7 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -69,7 +69,7 @@ namespace } -void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { HTMLForm params(getContext()->getSettingsRef(), request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); @@ -78,7 +78,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << message << std::endl; + *response.send() << message << '\n'; LOG_WARNING(log, fmt::runtime(message)); }; diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index 3ba8b182ba6..e3087701182 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -23,7 +23,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index f622995bf15..a23efb112de 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -21,7 +21,7 @@ namespace DB { -void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { HTMLForm params(getContext()->getSettingsRef(), request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); @@ -30,7 +30,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << message << std::endl; + response.send()->writeln(message); LOG_WARNING(log, fmt::runtime(message)); }; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index d57bbc0ca8a..ff5c02ca07b 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -21,7 +21,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 9130b3e0f47..e350afa2b10 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -46,12 +46,12 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string { response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << message << std::endl; + *response.send() << message << '\n'; LOG_WARNING(log, fmt::runtime(message)); } -void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { HTMLForm params(getContext()->getSettingsRef(), request); LOG_TRACE(log, "Request URI: {}", request.getURI()); diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index bc0fca8b9a5..7977245ff82 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -30,7 +30,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp index e3ab5e5cd00..80d0e2bf4a9 100644 --- a/programs/odbc-bridge/PingHandler.cpp +++ b/programs/odbc-bridge/PingHandler.cpp @@ -6,7 +6,7 @@ namespace DB { -void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response) +void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h index c969ec55af7..c5447107e0c 100644 --- a/programs/odbc-bridge/PingHandler.h +++ b/programs/odbc-bridge/PingHandler.h @@ -10,7 +10,7 @@ class PingHandler : public HTTPRequestHandler { public: explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {} - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: size_t keep_alive_timeout; diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index 020359f51fd..c7025ca4311 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -29,7 +29,7 @@ namespace } -void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { HTMLForm params(getContext()->getSettingsRef(), request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); @@ -38,7 +38,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << message << std::endl; + *response.send() << message << '\n'; LOG_WARNING(log, fmt::runtime(message)); }; diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index cb71a6fb5a2..aa0b04b1d31 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -24,7 +24,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: Poco::Logger * log; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 1fa3d1cfa73..a996ed6e34c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -152,6 +153,18 @@ namespace ProfileEvents { extern const Event MainConfigLoads; extern const Event ServerStartupMilliseconds; + extern const Event InterfaceNativeSendBytes; + extern const Event InterfaceNativeReceiveBytes; + extern const Event InterfaceHTTPSendBytes; + extern const Event InterfaceHTTPReceiveBytes; + extern const Event InterfacePrometheusSendBytes; + extern const Event InterfacePrometheusReceiveBytes; + extern const Event InterfaceInterserverSendBytes; + extern const Event InterfaceInterserverReceiveBytes; + extern const Event InterfaceMySQLSendBytes; + extern const Event InterfaceMySQLReceiveBytes; + extern const Event InterfacePostgreSQLSendBytes; + extern const Event InterfacePostgreSQLReceiveBytes; } namespace fs = std::filesystem; @@ -646,6 +659,7 @@ try } #endif + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); @@ -2047,7 +2061,7 @@ std::unique_ptr Server::buildProtocolStackFromConfig( auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") - return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes)); if (type == "tls") #if USE_SSL @@ -2059,20 +2073,20 @@ std::unique_ptr Server::buildProtocolStackFromConfig( if (type == "proxy1") return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); if (type == "mysql") - return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes)); if (type == "postgres") - return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes)); if (type == "http") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes) ); if (type == "prometheus") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes) ); if (type == "interserver") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes) ); throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); @@ -2205,7 +2219,7 @@ void Server::createServers( port_name, "http://" + address.toString(), std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); }); } @@ -2225,7 +2239,7 @@ void Server::createServers( port_name, "https://" + address.toString(), std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); #else UNUSED(port); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); @@ -2248,7 +2262,7 @@ void Server::createServers( port_name, "native protocol (tcp): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); @@ -2270,7 +2284,7 @@ void Server::createServers( port_name, "native protocol (tcp) with PROXY: " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); @@ -2293,7 +2307,7 @@ void Server::createServers( port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); @@ -2317,7 +2331,7 @@ void Server::createServers( listen_host, port_name, "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + std::make_unique(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); }); } @@ -2334,7 +2348,7 @@ void Server::createServers( listen_host, port_name, "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + std::make_unique(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); }); } @@ -2368,7 +2382,7 @@ void Server::createServers( port_name, "Prometheus: http://" + address.toString(), std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)); }); } } @@ -2414,7 +2428,9 @@ void Server::createInterserverServers( createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, - http_params)); + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); }); } @@ -2437,7 +2453,9 @@ void Server::createInterserverServers( createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, - http_params)); + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); #else UNUSED(port); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); diff --git a/programs/server/config.xml b/programs/server/config.xml index 1be20c5cad8..e1428b17084 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -713,11 +713,11 @@ For example, if there two users A, B and a row policy is defined only for A, then if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows. By default this setting is false for compatibility with earlier access configurations. --> - false + true - false + true - false + true - false + true - false + true 600 diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index da00546aa10..d54a2d2da6e 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -12,15 +13,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace fs = std::filesystem; #define EXTRACT_PATH_PATTERN ".*\\/store/(.*)" diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index d50bbdecf90..5de405d9f8f 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -24,20 +24,12 @@ #include #include #include +#include #include #include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { namespace ErrorCodes diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 463be6a3aea..6bbb9acc0c1 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -200,6 +200,7 @@ enum class AccessType M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \ M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \ + M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \ M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \ \ M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ diff --git a/src/Analyzer/ColumnTransformers.h b/src/Analyzer/ColumnTransformers.h index a337dc30099..43f32318fc9 100644 --- a/src/Analyzer/ColumnTransformers.h +++ b/src/Analyzer/ColumnTransformers.h @@ -3,15 +3,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include namespace DB { diff --git a/src/Analyzer/MatcherNode.h b/src/Analyzer/MatcherNode.h index 332a25eb15b..87ee5ee92bb 100644 --- a/src/Analyzer/MatcherNode.h +++ b/src/Analyzer/MatcherNode.h @@ -4,15 +4,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include namespace DB { diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 117e649ac88..7c38ba81c70 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -64,39 +64,43 @@ public: auto lhs_argument_node_type = lhs_argument->getNodeType(); auto rhs_argument_node_type = rhs_argument->getNodeType(); + QueryTreeNodePtr candidate; + if (lhs_argument_node_type == QueryTreeNodeType::FUNCTION && rhs_argument_node_type == QueryTreeNodeType::FUNCTION) - tryOptimizeComparisonTupleFunctions(node, lhs_argument, rhs_argument, comparison_function_name); + candidate = tryOptimizeComparisonTupleFunctions(lhs_argument, rhs_argument, comparison_function_name); else if (lhs_argument_node_type == QueryTreeNodeType::FUNCTION && rhs_argument_node_type == QueryTreeNodeType::CONSTANT) - tryOptimizeComparisonTupleFunctionAndConstant(node, lhs_argument, rhs_argument, comparison_function_name); + candidate = tryOptimizeComparisonTupleFunctionAndConstant(lhs_argument, rhs_argument, comparison_function_name); else if (lhs_argument_node_type == QueryTreeNodeType::CONSTANT && rhs_argument_node_type == QueryTreeNodeType::FUNCTION) - tryOptimizeComparisonTupleFunctionAndConstant(node, rhs_argument, lhs_argument, comparison_function_name); + candidate = tryOptimizeComparisonTupleFunctionAndConstant(rhs_argument, lhs_argument, comparison_function_name); + + if (candidate != nullptr && node->getResultType()->equals(*candidate->getResultType())) + node = candidate; } private: - void tryOptimizeComparisonTupleFunctions(QueryTreeNodePtr & node, + QueryTreeNodePtr tryOptimizeComparisonTupleFunctions( const QueryTreeNodePtr & lhs_function_node, const QueryTreeNodePtr & rhs_function_node, const std::string & comparison_function_name) const { const auto & lhs_function_node_typed = lhs_function_node->as(); if (lhs_function_node_typed.getFunctionName() != "tuple") - return; + return {}; const auto & rhs_function_node_typed = rhs_function_node->as(); if (rhs_function_node_typed.getFunctionName() != "tuple") - return; + return {}; const auto & lhs_tuple_function_arguments_nodes = lhs_function_node_typed.getArguments().getNodes(); size_t lhs_tuple_function_arguments_nodes_size = lhs_tuple_function_arguments_nodes.size(); const auto & rhs_tuple_function_arguments_nodes = rhs_function_node_typed.getArguments().getNodes(); if (lhs_tuple_function_arguments_nodes_size != rhs_tuple_function_arguments_nodes.size()) - return; + return {}; if (lhs_tuple_function_arguments_nodes_size == 1) { - node = makeComparisonFunction(lhs_tuple_function_arguments_nodes[0], rhs_tuple_function_arguments_nodes[0], comparison_function_name); - return; + return makeComparisonFunction(lhs_tuple_function_arguments_nodes[0], rhs_tuple_function_arguments_nodes[0], comparison_function_name); } QueryTreeNodes tuple_arguments_equals_functions; @@ -108,45 +112,44 @@ private: tuple_arguments_equals_functions.push_back(std::move(equals_function)); } - node = makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name); + return makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name); } - void tryOptimizeComparisonTupleFunctionAndConstant(QueryTreeNodePtr & node, + QueryTreeNodePtr tryOptimizeComparisonTupleFunctionAndConstant( const QueryTreeNodePtr & function_node, const QueryTreeNodePtr & constant_node, const std::string & comparison_function_name) const { const auto & function_node_typed = function_node->as(); if (function_node_typed.getFunctionName() != "tuple") - return; + return {}; auto & constant_node_typed = constant_node->as(); const auto & constant_node_value = constant_node_typed.getValue(); if (constant_node_value.getType() != Field::Types::Which::Tuple) - return; + return {}; const auto & constant_tuple = constant_node_value.get(); const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes(); size_t function_arguments_nodes_size = function_arguments_nodes.size(); if (function_arguments_nodes_size != constant_tuple.size()) - return; + return {}; auto constant_node_result_type = constant_node_typed.getResultType(); const auto * tuple_data_type = typeid_cast(constant_node_result_type.get()); if (!tuple_data_type) - return; + return {}; const auto & tuple_data_type_elements = tuple_data_type->getElements(); if (tuple_data_type_elements.size() != function_arguments_nodes_size) - return; + return {}; if (function_arguments_nodes_size == 1) { auto comparison_argument_constant_value = std::make_shared(constant_tuple[0], tuple_data_type_elements[0]); auto comparison_argument_constant_node = std::make_shared(std::move(comparison_argument_constant_value)); - node = makeComparisonFunction(function_arguments_nodes[0], std::move(comparison_argument_constant_node), comparison_function_name); - return; + return makeComparisonFunction(function_arguments_nodes[0], std::move(comparison_argument_constant_node), comparison_function_name); } QueryTreeNodes tuple_arguments_equals_functions; @@ -160,7 +163,7 @@ private: tuple_arguments_equals_functions.push_back(std::move(equals_function)); } - node = makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name); + return makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name); } QueryTreeNodePtr makeEquivalentTupleComparisonFunction(QueryTreeNodes tuple_arguments_equals_functions, diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 4b64f6c88b0..07a031fe4e8 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -61,6 +61,8 @@ public: return; auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; + if (count_distinct_argument_column->getNodeType() != QueryTreeNodeType::COLUMN) + return; auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); /// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 8c4bb7e414c..bb8d3d3cc60 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -396,7 +396,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context String backup_name_for_logging = backup_info.toStringForLogging(); String base_backup_name; if (backup_settings.base_backup_info) - base_backup_name = backup_settings.base_backup_info->toString(); + base_backup_name = backup_settings.base_backup_info->toStringForLogging(); try { @@ -750,7 +750,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt String backup_name_for_logging = backup_info.toStringForLogging(); String base_backup_name; if (restore_settings.base_backup_info) - base_backup_name = restore_settings.base_backup_info->toString(); + base_backup_name = restore_settings.base_backup_info->toStringForLogging(); addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, BackupStatus::RESTORING); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 86cb9acd056..083b959c4b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -551,13 +551,18 @@ endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) if (TARGET ch_contrib::qpl) -dbms_target_link_libraries(PUBLIC ch_contrib::qpl) + dbms_target_link_libraries(PUBLIC ch_contrib::qpl) endif () if (TARGET ch_contrib::accel-config) dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) endif () +if (TARGET ch_contrib::qatzstd_plugin) + dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) +endif () + target_link_libraries(clickhouse_common_io PUBLIC boost::context) dbms_target_link_libraries(PUBLIC boost::context) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 75ca66f2647..352d2a53892 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -651,7 +651,13 @@ void Connection::sendQuery( if (method == "ZSTD") level = settings->network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodec( + method, + level, + !settings->allow_suspicious_codecs, + settings->allow_experimental_codecs, + settings->enable_deflate_qpl_codec, + settings->enable_zstd_qat_codec); compression_codec = CompressionCodecFactory::instance().get(method, level); } else diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 4406114a955..43166659b18 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -118,18 +118,18 @@ ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const return result; } -std::vector ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts, - const Settings & settings, - PoolMode pool_mode, - AsyncCallback async_callback, - std::optional skip_unavailable_endpoints) +std::vector ConnectionPoolWithFailover::getMany( + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + AsyncCallback async_callback, + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) - { - return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); - }; + { return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); }; - std::vector results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints); + std::vector results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func); std::vector entries; entries.reserve(results.size()); @@ -153,17 +153,17 @@ std::vector ConnectionPoolWithFailover::g std::vector ConnectionPoolWithFailover::getManyChecked( const ConnectionTimeouts & timeouts, - const Settings & settings, PoolMode pool_mode, + const Settings & settings, + PoolMode pool_mode, const QualifiedTableName & table_to_check, AsyncCallback async_callback, - std::optional skip_unavailable_endpoints) + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) - { - return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); - }; + { return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); }; - return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints); + return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func); } ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings) @@ -175,14 +175,16 @@ ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::ma } std::vector ConnectionPoolWithFailover::getManyImpl( - const Settings & settings, - PoolMode pool_mode, - const TryGetEntryFunc & try_get_entry, - std::optional skip_unavailable_endpoints) + const Settings & settings, + PoolMode pool_mode, + const TryGetEntryFunc & try_get_entry, + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { if (nested_pools.empty()) - throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, - "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty"); + throw DB::Exception( + DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, + "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty"); if (!skip_unavailable_endpoints.has_value()) skip_unavailable_endpoints = settings.skip_unavailable_shards; @@ -203,14 +205,13 @@ std::vector ConnectionPoolWithFailover::g else throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); - GetPriorityFunc get_priority = makeGetPriorityFunc(settings); + if (!priority_func) + priority_func = makeGetPriorityFunc(settings); UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value; - return Base::getMany(min_entries, max_entries, max_tries, - max_ignored_errors, fallback_to_stale_replicas, - try_get_entry, get_priority); + return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, try_get_entry, priority_func); } ConnectionPoolWithFailover::TryResult @@ -251,11 +252,14 @@ ConnectionPoolWithFailover::tryGetEntry( return result; } -std::vector ConnectionPoolWithFailover::getShuffledPools(const Settings & settings) +std::vector +ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func) { - GetPriorityFunc get_priority = makeGetPriorityFunc(settings); + if (!priority_func) + priority_func = makeGetPriorityFunc(settings); + UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; - return Base::getShuffledPools(max_ignored_errors, get_priority); + return Base::getShuffledPools(max_ignored_errors, priority_func); } } diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 208a003edb8..eaef717a2d6 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -54,10 +54,13 @@ public: /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ - std::vector getMany(const ConnectionTimeouts & timeouts, - const Settings & settings, PoolMode pool_mode, - AsyncCallback async_callback = {}, - std::optional skip_unavailable_endpoints = std::nullopt); + std::vector getMany( + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + AsyncCallback async_callback = {}, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); /// The same as getMany(), but return std::vector. std::vector getManyForTableFunction(const ConnectionTimeouts & timeouts, @@ -69,12 +72,13 @@ public: /// The same as getMany(), but check that replication delay for table_to_check is acceptable. /// Delay threshold is taken from settings. std::vector getManyChecked( - const ConnectionTimeouts & timeouts, - const Settings & settings, - PoolMode pool_mode, - const QualifiedTableName & table_to_check, - AsyncCallback async_callback = {}, - std::optional skip_unavailable_endpoints = std::nullopt); + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + const QualifiedTableName & table_to_check, + AsyncCallback async_callback = {}, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); struct NestedPoolStatus { @@ -87,7 +91,7 @@ public: using Status = std::vector; Status getStatus() const; - std::vector getShuffledPools(const Settings & settings); + std::vector getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}); size_t getMaxErrorCup() const { return Base::max_error_cap; } @@ -96,13 +100,16 @@ public: Base::updateSharedErrorCounts(shuffled_pools); } + size_t getPoolSize() const { return Base::getPoolSize(); } + private: /// Get the values of relevant settings and call Base::getMany() std::vector getManyImpl( - const Settings & settings, - PoolMode pool_mode, - const TryGetEntryFunc & try_get_entry, - std::optional skip_unavailable_endpoints = std::nullopt); + const Settings & settings, + PoolMode pool_mode, + const TryGetEntryFunc & try_get_entry, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); /// Try to get a connection from the pool and check that it is good. /// If table_to_check is not null and the check is enabled in settings, check that replication delay @@ -115,7 +122,7 @@ private: const QualifiedTableName * table_to_check = nullptr, AsyncCallback async_callback = {}); - GetPriorityFunc makeGetPriorityFunc(const Settings & settings); + GetPriorityForLoadBalancing::Func makeGetPriorityFunc(const Settings & settings); GetPriorityForLoadBalancing get_priority_load_balancing; }; diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 0c69d7712ea..7ea13a7dffc 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -28,16 +28,18 @@ HedgedConnections::HedgedConnections( const ThrottlerPtr & throttler_, PoolMode pool_mode, std::shared_ptr table_to_check_, - AsyncCallback async_callback) + AsyncCallback async_callback, + GetPriorityForLoadBalancing::Func priority_func) : hedged_connections_factory( - pool_, - context_->getSettingsRef(), - timeouts_, - context_->getSettingsRef().connections_with_failover_max_tries.value, - context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value, - context_->getSettingsRef().max_parallel_replicas.value, - context_->getSettingsRef().skip_unavailable_shards.value, - table_to_check_) + pool_, + context_->getSettingsRef(), + timeouts_, + context_->getSettingsRef().connections_with_failover_max_tries.value, + context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value, + context_->getSettingsRef().max_parallel_replicas.value, + context_->getSettingsRef().skip_unavailable_shards.value, + table_to_check_, + priority_func) , context(std::move(context_)) , settings(context->getSettingsRef()) , throttler(throttler_) diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h index ccdc59965e2..5bc274332db 100644 --- a/src/Client/HedgedConnections.h +++ b/src/Client/HedgedConnections.h @@ -70,13 +70,15 @@ public: size_t index; }; - HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_, - ContextPtr context_, - const ConnectionTimeouts & timeouts_, - const ThrottlerPtr & throttler, - PoolMode pool_mode, - std::shared_ptr table_to_check_ = nullptr, - AsyncCallback async_callback = {}); + HedgedConnections( + const ConnectionPoolWithFailoverPtr & pool_, + ContextPtr context_, + const ConnectionTimeouts & timeouts_, + const ThrottlerPtr & throttler, + PoolMode pool_mode, + std::shared_ptr table_to_check_ = nullptr, + AsyncCallback async_callback = {}, + GetPriorityForLoadBalancing::Func priority_func = {}); void sendScalarsData(Scalars & data) override; diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 6ac504772e2..f7b5ceedc96 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -29,7 +29,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory( bool fallback_to_stale_replicas_, UInt64 max_parallel_replicas_, bool skip_unavailable_shards_, - std::shared_ptr table_to_check_) + std::shared_ptr table_to_check_, + GetPriorityForLoadBalancing::Func priority_func) : pool(pool_) , timeouts(timeouts_) , table_to_check(table_to_check_) @@ -39,7 +40,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory( , max_parallel_replicas(max_parallel_replicas_) , skip_unavailable_shards(skip_unavailable_shards_) { - shuffled_pools = pool->getShuffledPools(settings_); + shuffled_pools = pool->getShuffledPools(settings_, priority_func); for (auto shuffled_pool : shuffled_pools) replicas.emplace_back(std::make_unique(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get())); } @@ -323,8 +324,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect else { ShuffledPool & shuffled_pool = shuffled_pools[index]; - LOG_WARNING( - log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); + LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index e41ac9767a5..f187e9b2abb 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -53,7 +53,8 @@ public: bool fallback_to_stale_replicas_, UInt64 max_parallel_replicas_, bool skip_unavailable_shards_, - std::shared_ptr table_to_check_ = nullptr); + std::shared_ptr table_to_check_ = nullptr, + GetPriorityForLoadBalancing::Func priority_func = {}); /// Create and return active connections according to pool_mode. std::vector getManyConnections(PoolMode pool_mode, AsyncCallback async_callback = {}); diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index c4137920395..24a96c1c85b 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -48,11 +48,11 @@ void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_) return; auto [buf, len] = adjustToPageSize(buf_, len_, page_size); - if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0) + if (::madvise(buf, len, MADV_POPULATE_WRITE) < 0) LOG_TRACE( LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1), "Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)", - errnoToString(res)); + errnoToString(errno)); #endif } diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 962adb8b052..0e0fa25e7a1 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -43,6 +43,19 @@ void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, Atomic } } +void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel) +{ + cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> {}", + self->name, + getExceptionMessage(dependency->exception(), /* with_stacktrace = */ false))); +} + +void ignoreDependencyFailure(const LoadJobPtr &, const LoadJobPtr &, std::exception_ptr &) +{ + // No-op +} + LoadStatus LoadJob::status() const { std::unique_lock lock{mutex}; @@ -96,7 +109,10 @@ size_t LoadJob::canceled(const std::exception_ptr & ptr) size_t LoadJob::finish() { - func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` return + // To ensure functions are destructed before `AsyncLoader::wait()` return + func = {}; + dependency_failure = {}; + finish_time = std::chrono::system_clock::now(); if (waiters > 0) finished.notify_all(); @@ -327,17 +343,19 @@ void AsyncLoader::schedule(const LoadJobSet & jobs_to_schedule) if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED) { - // Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs. - std::exception_ptr e; + // Dependency on already failed or canceled job -- it's okay. + // Process as usual (may lead to cancel of all dependent jobs). + std::exception_ptr cancel; NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' -> {}", - job->name, - getExceptionMessage(dep->exception(), /* with_stacktrace = */ false))); + if (job->dependency_failure) + job->dependency_failure(job, dep, cancel); }); - finish(job, LoadStatus::CANCELED, e, lock); - break; // This job is now finished, stop its dependencies processing + if (cancel) + { + finish(job, LoadStatus::CANCELED, cancel, lock); + break; // This job is now finished, stop its dependencies processing + } } } } @@ -515,63 +533,76 @@ String AsyncLoader::checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJo return {}; } -void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock & lock) +void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock & lock) { chassert(scheduled_jobs.contains(job)); // Job was pending + + // Notify waiters size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job if (status == LoadStatus::OK) - { - // Notify waiters - resumed_workers += job->ok(); + resumed_workers = job->ok(); + else if (status == LoadStatus::FAILED) + resumed_workers = job->failed(reason); + else if (status == LoadStatus::CANCELED) + resumed_workers = job->canceled(reason); - // Update dependent jobs and enqueue if ready - for (const auto & dep : scheduled_jobs[job].dependent_jobs) + // Adjust suspended workers count + if (resumed_workers) + { + Pool & pool = pools[job->executionPool()]; + pool.suspended_workers -= resumed_workers; + } + + Info & info = scheduled_jobs[job]; + if (info.isReady()) + { + // Job could be in ready queue (on cancel) -- must be dequeued + pools[job->pool_id].ready_queue.erase(info.ready_seqno); + info.ready_seqno = 0; + } + + // To avoid container modification during recursion (during clean dependency graph edges below) + LoadJobSet dependent; + dependent.swap(info.dependent_jobs); + + // Update dependent jobs + for (const auto & dpt : dependent) + { + if (auto dpt_info = scheduled_jobs.find(dpt); dpt_info != scheduled_jobs.end()) { - chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending - Info & dep_info = scheduled_jobs[dep]; - dep_info.dependencies_left--; - if (!dep_info.isBlocked()) - enqueue(dep_info, dep, lock); + dpt_info->second.dependencies_left--; + if (!dpt_info->second.isBlocked()) + enqueue(dpt_info->second, dpt, lock); + + if (status != LoadStatus::OK) + { + std::exception_ptr cancel; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + if (dpt->dependency_failure) + dpt->dependency_failure(dpt, job, cancel); + }); + // Recurse into dependent job if it should be canceled + if (cancel) + finish(dpt, LoadStatus::CANCELED, cancel, lock); + } + } + else + { + // Job has already been canceled. Do not enter twice into the same job during finish recursion. + // This happens in {A<-B; A<-C; B<-D; C<-D} graph for D if A is failed or canceled. + chassert(status == LoadStatus::CANCELED); } } - else + + // Clean dependency graph edges pointing to canceled jobs + if (status != LoadStatus::OK) { - // Notify waiters - if (status == LoadStatus::FAILED) - resumed_workers += job->failed(exception_from_job); - else if (status == LoadStatus::CANCELED) - resumed_workers += job->canceled(exception_from_job); - - Info & info = scheduled_jobs[job]; - if (info.isReady()) - { - pools[job->pool_id].ready_queue.erase(info.ready_seqno); - info.ready_seqno = 0; - } - - // Recurse into all dependent jobs - LoadJobSet dependent; - dependent.swap(info.dependent_jobs); // To avoid container modification during recursion - for (const auto & dep : dependent) - { - if (!scheduled_jobs.contains(dep)) - continue; // Job has already been canceled - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' -> {}", - dep->name, - getExceptionMessage(exception_from_job, /* with_stacktrace = */ false))); - }); - finish(dep, LoadStatus::CANCELED, e, lock); - } - - // Clean dependency graph edges pointing to canceled jobs for (const auto & dep : job->dependencies) + { if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) dep_info->second.dependent_jobs.erase(job); + } } // Job became finished @@ -582,12 +613,6 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti if (log_progress) logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch); }); - - if (resumed_workers) - { - Pool & pool = pools[job->executionPool()]; - pool.suspended_workers -= resumed_workers; - } } void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock & lock) @@ -612,6 +637,9 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::un } job->pool_id.store(new_pool_id); + // TODO(serxa): we should adjust suspended_workers and suspended_waiters here. + // Otherwise suspended_workers we be left inconsistent. Fix it and add a test. + // Scenario: schedule a job A, wait for it from a job B in the same pool, prioritize A // Recurse into dependencies for (const auto & dep : job->dependencies) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 95a2273a0f4..85de309b153 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -57,12 +58,13 @@ enum class LoadStatus class LoadJob : private boost::noncopyable { public: - template - LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_) + template + LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, DFFunc && dependency_failure_, Func && func_) : dependencies(std::forward(dependencies_)) , name(std::move(name_)) , execution_pool_id(pool_id_) , pool_id(pool_id_) + , dependency_failure(std::forward(dependency_failure_)) , func(std::forward(func_)) {} @@ -108,6 +110,14 @@ private: std::atomic job_id{0}; std::atomic execution_pool_id; std::atomic pool_id; + + // Handler for failed or canceled dependencies. + // If job needs to be canceled on `dependency` failure, then function should set `cancel` to a specific reason. + // Note that implementation should be fast and cannot use AsyncLoader, because it is called under `AsyncLoader::mutex`. + // Note that `dependency_failure` is called only on pending jobs. + std::function dependency_failure; + + // Function to be called to execute the job. std::function func; mutable std::mutex mutex; @@ -123,35 +133,54 @@ private: std::atomic finish_time{TimePoint{}}; }; -struct EmptyJobFunc -{ - void operator()(AsyncLoader &, const LoadJobPtr &) {} -}; +// For LoadJob::dependency_failure. Cancels the job on the first dependency failure or cancel. +void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel); -template -LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func = EmptyJobFunc()) +// For LoadJob::dependency_failure. Never cancels the job due to dependency failure or cancel. +void ignoreDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel); + +template concept LoadJobDependencyFailure = std::invocable; +template concept LoadJobFunc = std::invocable; + +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) { - return std::make_shared(std::move(dependencies), std::move(name), 0, std::forward(func)); + return std::make_shared(std::move(dependencies), std::move(name), 0, std::forward(dependency_failure), std::forward(func)); } -template -LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, Func && func = EmptyJobFunc()) +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) { - return std::make_shared(dependencies, std::move(name), 0, std::forward(func)); + return std::make_shared(dependencies, std::move(name), 0, std::forward(dependency_failure), std::forward(func)); } -template -LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc()) +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) { - return std::make_shared(std::move(dependencies), std::move(name), pool_id, std::forward(func)); + return std::make_shared(std::move(dependencies), std::move(name), pool_id, std::forward(dependency_failure), std::forward(func)); } -template -LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc()) +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) { - return std::make_shared(dependencies, std::move(name), pool_id, std::forward(func)); + return std::make_shared(dependencies, std::move(name), pool_id, std::forward(dependency_failure), std::forward(func)); } +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), 0, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), 0, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), pool_id, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), pool_id, cancelOnDependencyFailure, std::forward(func)); +} // Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set). class LoadTask : private boost::noncopyable @@ -277,7 +306,7 @@ private: { size_t dependencies_left = 0; // Current number of dependencies on pending jobs. UInt64 ready_seqno = 0; // Zero means that job is not in ready queue. - LoadJobSet dependent_jobs; // Set of jobs dependent on this job. + LoadJobSet dependent_jobs; // Set of jobs dependent on this job. Contains only scheduled jobs. // Three independent states of a scheduled job. bool isBlocked() const { return dependencies_left > 0; } @@ -371,7 +400,7 @@ public: private: void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock); String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock & lock); - void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock & lock); + void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock & lock); void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock & lock); void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock & lock); void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock); diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h index 1cbfcc2165a..a809136f451 100644 --- a/src/Common/CacheBase.h +++ b/src/Common/CacheBase.h @@ -5,15 +5,15 @@ #include #include +#include +#include + #include -#include -#include #include #include +#include #include -#include - namespace DB { @@ -227,10 +227,10 @@ public: cache_policy->setMaxSizeInBytes(max_size_in_bytes); } - void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) + void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { std::lock_guard lock(mutex); - cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries); + cache_policy->setQuotaForUser(user_id, max_size_in_bytes, max_entries); } virtual ~CacheBase() = default; diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index ba87e3b3a0c..7e20384aa2a 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -1,10 +1,11 @@ #pragma once +#include +#include +#include +#include #include #include -#include -#include -#include namespace DB diff --git a/src/Common/CpuId.h b/src/Common/CpuId.h index 1e54ccf62b3..1d15867289d 100644 --- a/src/Common/CpuId.h +++ b/src/Common/CpuId.h @@ -93,7 +93,10 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(CLFLUSHOPT) \ OP(CLWB) \ OP(XSAVE) \ - OP(OSXSAVE) + OP(OSXSAVE) \ + OP(AMXBF16) \ + OP(AMXTILE) \ + OP(AMXINT8) union CpuInfo { @@ -313,6 +316,35 @@ bool haveRDRAND() noexcept return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u); } +inline bool haveAMX() noexcept +{ +#if defined(__x86_64__) || defined(__i386__) + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf + return haveOSXSAVE() // implies haveXSAVE() + && ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS +#else + return false; +#endif +} + +bool haveAMXBF16() noexcept +{ + return haveAMX() + && ((CpuInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit +} + +bool haveAMXTILE() noexcept +{ + return haveAMX() + && ((CpuInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit +} + +bool haveAMXINT8() noexcept +{ + return haveAMX() + && ((CpuInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit +} + struct CpuFlagsCache { #define DEF_NAME(X) static inline bool have_##X = have##X(); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 2613e9ec116..724b6ab62f7 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -242,7 +242,7 @@ M(FilesystemCacheDelayedCleanupElements, "Filesystem cache elements in background cleanup queue") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segment which are currently hold as unreleasable") \ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ - M(S3Requests, "S3 requests") \ + M(S3Requests, "S3 requests count") \ M(KeeperAliveConnections, "Number of alive connections") \ M(KeeperOutstandingRequets, "Number of outstanding requests") \ M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ diff --git a/src/Common/FileRenamer.cpp b/src/Common/FileRenamer.cpp index da3f9e62d16..b43b870b94e 100644 --- a/src/Common/FileRenamer.cpp +++ b/src/Common/FileRenamer.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -11,15 +12,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace fs = std::filesystem; namespace DB diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index bc00e047a88..d4c6f89ff92 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -9,7 +9,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const +GetPriorityForLoadBalancing::Func +GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const { std::function get_priority; switch (load_balance) @@ -33,19 +34,26 @@ std::function GetPriorityForLoadBalancing::getPriorityFu get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; }; break; case LoadBalancing::ROUND_ROBIN: - if (last_used >= pool_size) - last_used = 0; + auto local_last_used = last_used % pool_size; ++last_used; - /* Consider pool_size equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [this, pool_size](size_t i) + + // Example: pool_size = 5 + // | local_last_used | i=0 | i=1 | i=2 | i=3 | i=4 | + // | 0 | 4 | 0 | 1 | 2 | 3 | + // | 1 | 3 | 4 | 0 | 1 | 2 | + // | 2 | 2 | 3 | 4 | 0 | 1 | + // | 3 | 1 | 2 | 3 | 4 | 0 | + // | 4 | 0 | 1 | 2 | 3 | 4 | + + get_priority = [pool_size, local_last_used](size_t i) { - ++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does - return Priority{static_cast(i < last_used ? pool_size - i : i - last_used)}; + size_t priority = pool_size - 1; + if (i < local_last_used) + priority = pool_size - 1 - (local_last_used - i); + if (i > local_last_used) + priority = i - local_last_used - 1; + + return Priority{static_cast(priority)}; }; break; } diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index c60d180eca0..0de99730977 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -8,7 +8,12 @@ namespace DB class GetPriorityForLoadBalancing { public: - explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + using Func = std::function; + + explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t last_used_ = 0) + : load_balancing(load_balancing_), last_used(last_used_) + { + } GetPriorityForLoadBalancing() = default; bool operator == (const GetPriorityForLoadBalancing & other) const @@ -23,7 +28,7 @@ public: return !(*this == other); } - std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; std::vector hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools. std::vector hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools. diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp index e66f5212bab..9ad8dd6fccf 100644 --- a/src/Common/HTTPHeaderFilter.cpp +++ b/src/Common/HTTPHeaderFilter.cpp @@ -1,15 +1,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include namespace DB { diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 189af4db19b..8aa75d1d81f 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -2,10 +2,11 @@ #include #include +#include #include #include -#include +#include namespace DB { @@ -43,7 +44,7 @@ public: virtual void setMaxCount(size_t /*max_count*/) = 0; virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0; - virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); } + virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_id, max_size_in_bytes, max_entries); } /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key. diff --git a/src/Common/ICachePolicyUserQuota.h b/src/Common/ICachePolicyUserQuota.h index 717cb916f85..6fa4f7947cb 100644 --- a/src/Common/ICachePolicyUserQuota.h +++ b/src/Common/ICachePolicyUserQuota.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB @@ -15,14 +16,14 @@ class ICachePolicyUserQuota { public: /// Register or update the user's quota for the given resource. - virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0; + virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) = 0; /// Update the actual resource usage for the given user. - virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0; - virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0; + virtual void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0; + virtual void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0; /// Is the user allowed to write a new entry into the cache? - virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0; + virtual bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const = 0; virtual ~ICachePolicyUserQuota() = default; }; @@ -33,10 +34,10 @@ using CachePolicyUserQuotaPtr = std::unique_ptr; class NoCachePolicyUserQuota : public ICachePolicyUserQuota { public: - void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {} - void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {} - void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {} - bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; } + void setQuotaForUser(const UUID & /*user_id*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {} + void increaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {} + void decreaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {} + bool approveWrite(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) const override { return true; } }; diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp new file mode 100644 index 00000000000..a8b9d70e731 --- /dev/null +++ b/src/Common/Jemalloc.cpp @@ -0,0 +1,88 @@ +#include + +#if USE_JEMALLOC + +#include +#include +#include + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +namespace ProfileEvents +{ + extern const Event MemoryAllocatorPurge; + extern const Event MemoryAllocatorPurgeTimeMicroseconds; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void purgeJemallocArenas() +{ + LOG_TRACE(&Poco::Logger::get("SystemJemalloc"), "Purging unused memory"); + Stopwatch watch; + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); +} + +void checkJemallocProfilingEnabled() +{ + bool active = true; + size_t active_size = sizeof(active); + mallctl("opt.prof", &active, &active_size, nullptr, 0); + + if (!active) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "ClickHouse was started without enabling profiling for jemalloc. To use jemalloc's profiler, following env variable should be " + "set: MALLOC_CONF=background_thread:true,prof:true"); +} + +void setJemallocProfileActive(bool value) +{ + checkJemallocProfilingEnabled(); + bool active = true; + size_t active_size = sizeof(active); + mallctl("prof.active", &active, &active_size, nullptr, 0); + if (active == value) + { + LOG_TRACE(&Poco::Logger::get("SystemJemalloc"), "Profiling is already {}", active ? "enabled" : "disabled"); + return; + } + + mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + LOG_TRACE(&Poco::Logger::get("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); +} + +std::string flushJemallocProfile(const std::string & file_prefix) +{ + checkJemallocProfilingEnabled(); + char * prefix_buffer; + size_t prefix_size = sizeof(prefix_buffer); + int n = mallctl("opt.prof_prefix", &prefix_buffer, &prefix_size, nullptr, 0); + if (!n && std::string_view(prefix_buffer) != "jeprof") + { + LOG_TRACE(&Poco::Logger::get("SystemJemalloc"), "Flushing memory profile with prefix {}", prefix_buffer); + mallctl("prof.dump", nullptr, nullptr, nullptr, 0); + return prefix_buffer; + } + + static std::atomic profile_counter{0}; + std::string profile_dump_path = fmt::format("{}.{}.{}.heap", file_prefix, getpid(), profile_counter.fetch_add(1)); + const auto * profile_dump_path_str = profile_dump_path.c_str(); + + LOG_TRACE(&Poco::Logger::get("SystemJemalloc"), "Flushing memory profile to {}", profile_dump_path_str); + mallctl("prof.dump", nullptr, nullptr, &profile_dump_path_str, sizeof(profile_dump_path_str)); + return profile_dump_path; +} + +} + +#endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h new file mode 100644 index 00000000000..80ff0f1a319 --- /dev/null +++ b/src/Common/Jemalloc.h @@ -0,0 +1,22 @@ +#pragma once + +#include "config.h" + +#if USE_JEMALLOC + +#include + +namespace DB +{ + +void purgeJemallocArenas(); + +void checkJemallocProfilingEnabled(); + +void setJemallocProfileActive(bool value); + +std::string flushJemallocProfile(const std::string & file_prefix); + +} + +#endif diff --git a/src/Common/MatchGenerator.cpp b/src/Common/MatchGenerator.cpp new file mode 100644 index 00000000000..f047c21b470 --- /dev/null +++ b/src/Common/MatchGenerator.cpp @@ -0,0 +1,494 @@ +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +# pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +# pragma clang diagnostic ignored "-Wnested-anon-types" +# pragma clang diagnostic ignored "-Wunused-parameter" +# pragma clang diagnostic ignored "-Wshadow-field-in-constructor" +# pragma clang diagnostic ignored "-Wdtor-name" +#endif +#include +#include +#include +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +#ifdef LOG_INFO +#undef LOG_INFO +#undef LOG_WARNING +#undef LOG_ERROR +#undef LOG_FATAL +#endif + +#include "MatchGenerator.h" + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +} +} + + +namespace re2 +{ + +class RandomStringPrepareWalker : public Regexp::Walker +{ +private: + static constexpr int ImplicitMax = 100; + + using Children = std::vector; + + class Generators; + + /// This function objects look much prettier than lambda expression when stack traces are printed + class NodeFunction + { + public: + virtual size_t operator() (char * out, size_t size) = 0; + virtual size_t getRequiredSize() = 0; + virtual ~NodeFunction() = default; + }; + + using NodeFunctionPtr = std::shared_ptr; + using NodeFuncs = std::vector; + + static NodeFuncs getFuncs(const Children & children_, const Generators & generators_) + { + NodeFuncs result; + result.reserve(children_.size()); + + for (auto * child: children_) + { + result.push_back(generators_.at(child)); + } + + return result; + } + + class Generators: public std::map {}; + + class RegexpConcatFunction : public NodeFunction + { + public: + RegexpConcatFunction(const Children & children_, const Generators & generators_) + : children(getFuncs(children_, generators_)) + { + } + + size_t operator () (char * out, size_t size) override + { + size_t total_size = 0; + + for (auto & child: children) + { + size_t consumed = child->operator()(out, size); + chassert(consumed <= size); + out += consumed; + size -= consumed; + total_size += consumed; + } + + return total_size; + } + + size_t getRequiredSize() override + { + size_t total_size = 0; + for (auto & child: children) + total_size += child->getRequiredSize(); + return total_size; + } + + private: + NodeFuncs children; + }; + + class RegexpAlternateFunction : public NodeFunction + { + public: + RegexpAlternateFunction(const Children & children_, const Generators & generators_) + : children(getFuncs(children_, generators_)) + { + } + + size_t operator () (char * out, size_t size) override + { + std::uniform_int_distribution distribution(0, static_cast(children.size()-1)); + int chosen = distribution(thread_local_rng); + size_t consumed = children[chosen]->operator()(out, size); + chassert(consumed <= size); + return consumed; + } + + size_t getRequiredSize() override + { + size_t total_size = 0; + for (auto & child: children) + total_size = std::max(total_size, child->getRequiredSize()); + return total_size; + } + + private: + NodeFuncs children; + }; + + class RegexpRepeatFunction : public NodeFunction + { + public: + RegexpRepeatFunction(Regexp * re_, const Generators & generators_, int min_repeat_, int max_repeat_) + : func(generators_.at(re_)) + , min_repeat(min_repeat_) + , max_repeat(max_repeat_) + { + } + + size_t operator () (char * out, size_t size) override + { + std::uniform_int_distribution distribution(min_repeat, max_repeat); + int ntimes = distribution(thread_local_rng); + + size_t total_size = 0; + for (int i = 0; i < ntimes; ++i) + { + size_t consumed =func->operator()(out, size); + chassert(consumed <= size); + out += consumed; + size -= consumed; + total_size += consumed; + } + return total_size; + } + + size_t getRequiredSize() override + { + return max_repeat * func->getRequiredSize(); + } + + private: + NodeFunctionPtr func; + int min_repeat = 0; + int max_repeat = 0; + }; + + class RegexpCharClassFunction : public NodeFunction + { + using CharRanges = std::vector>; + + public: + explicit RegexpCharClassFunction(Regexp * re_) + { + CharClass * cc = re_->cc(); + chassert(cc); + if (cc->empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "kRegexpCharClass is empty"); + + char_count = cc->size(); + char_ranges.reserve(std::distance(cc->begin(), cc->end())); + + for (const auto range: *cc) + { + char_ranges.emplace_back(range.lo, range.hi); + } + } + + size_t operator () (char * out, size_t size) override + { + chassert(UTFmax <= size); + + std::uniform_int_distribution distribution(1, char_count); + int chosen = distribution(thread_local_rng); + int count_down = chosen; + + auto it = char_ranges.begin(); + for (; it != char_ranges.end(); ++it) + { + auto [lo, hi] = *it; + auto range_len = hi - lo + 1; + if (count_down <= range_len) + break; + count_down -= range_len; + } + + if (it == char_ranges.end()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, + "Unable to choose the rune. Runes {}, ranges {}, chosen {}", + char_count, char_ranges.size(), chosen); + + auto [lo, _] = *it; + Rune r = lo + count_down - 1; + return re2::runetochar(out, &r); + } + + size_t getRequiredSize() override + { + return UTFmax; + } + + private: + int char_count = 0; + CharRanges char_ranges; + }; + + class RegexpLiteralStringFunction : public NodeFunction + { + public: + explicit RegexpLiteralStringFunction(Regexp * re_) + { + if (re_->nrunes() == 0) + return; + + char buffer[UTFmax]; + for (int i = 0; i < re_->nrunes(); ++i) + { + int n = re2::runetochar(buffer, &re_->runes()[i]); + literal_string += String(buffer, n); + } + } + + size_t operator () (char * out, size_t size) override + { + chassert(literal_string.size() <= size); + + memcpy(out, literal_string.data(), literal_string.size()); + return literal_string.size(); + } + + size_t getRequiredSize() override + { + return literal_string.size(); + } + + private: + String literal_string; + }; + + class RegexpLiteralFunction : public NodeFunction + { + public: + explicit RegexpLiteralFunction(Regexp * re_) + { + char buffer[UTFmax]; + + Rune r = re_->rune(); + int n = re2::runetochar(buffer, &r); + literal = String(buffer, n); + } + + size_t operator () (char * out, size_t size) override + { + chassert(literal.size() <= size); + + memcpy(out, literal.data(), literal.size()); + return literal.size(); + } + + size_t getRequiredSize() override + { + return literal.size(); + } + + private: + String literal; + }; + + class ThrowExceptionFunction : public NodeFunction + { + public: + explicit ThrowExceptionFunction(Regexp * re_) + : operation(magic_enum::enum_name(re_->op())) + { + } + + size_t operator () (char *, size_t) override + { + throw DB::Exception( + DB::ErrorCodes::BAD_ARGUMENTS, + "RandomStringPrepareWalker: regexp node '{}' is not supported for generating a random match", + operation); + } + + size_t getRequiredSize() override + { + return 0; + } + + private: + String operation; + }; + + +public: + std::function getGenerator() + { + if (root == nullptr) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no root has been set"); + + if (generators.empty()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no generators"); + + auto root_func = generators.at(root); + auto required_buffer_size = root_func->getRequiredSize(); + auto generator_func = [=] () + -> String + { + auto buffer = String(required_buffer_size, '\0'); + size_t size = root_func->operator()(buffer.data(), buffer.size()); + buffer.resize(size); + return buffer; + }; + + root = nullptr; + generators = {}; + + return std::move(generator_func); + } + +private: + Children CopyChildrenArgs(Regexp ** children, int nchild) + { + Children result; + result.reserve(nchild); + for (int i = 0; i < nchild; ++i) + result.push_back(Copy(children[i])); + return result; + } + + Regexp * ShortVisit(Regexp* /*re*/, Regexp * /*parent_arg*/) override + { + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ShortVisit should not be called"); + } + + Regexp * PreVisit(Regexp * re, Regexp * parent_arg, bool* /*stop*/) override /*noexcept*/ + { + if (parent_arg == nullptr) + { + chassert(root == nullptr); + chassert(re != nullptr); + root = re; + } + + return re; + } + + Regexp * PostVisit(Regexp * re, Regexp * /*parent_arg*/, Regexp * pre_arg, + Regexp ** child_args, int nchild_args) override /*noexcept*/ + { + switch (re->op()) + { + case kRegexpConcat: // Matches concatenation of sub_[0..nsub-1]. + generators[re] = std::make_shared(CopyChildrenArgs(child_args, nchild_args), generators); + break; + case kRegexpAlternate: // Matches union of sub_[0..nsub-1]. + generators[re] = std::make_shared(CopyChildrenArgs(child_args, nchild_args), generators); + break; + case kRegexpQuest: // Matches sub_[0] zero or one times. + chassert(nchild_args == 1); + generators[re] = std::make_shared(child_args[0], generators, 0, 1); + break; + case kRegexpStar: // Matches sub_[0] zero or more times. + chassert(nchild_args == 1); + generators[re] = std::make_shared(child_args[0], generators, 0, ImplicitMax); + break; + case kRegexpPlus: // Matches sub_[0] one or more times. + chassert(nchild_args == 1); + generators[re] = std::make_shared(child_args[0], generators, 1, ImplicitMax); + break; + case kRegexpCharClass: // Matches character class given by cc_. + chassert(nchild_args == 0); + generators[re] = std::make_shared(re); + break; + case kRegexpLiteralString: // Matches runes_. + chassert(nchild_args == 0); + generators[re] = std::make_shared(re); + break; + case kRegexpLiteral: // Matches rune_. + chassert(nchild_args == 0); + generators[re] = std::make_shared(re); + break; + case kRegexpCapture: // Parenthesized (capturing) subexpression. + chassert(nchild_args == 1); + generators[re] = generators.at(child_args[0]); + break; + + case kRegexpNoMatch: // Matches no strings. + case kRegexpEmptyMatch: // Matches empty string. + case kRegexpRepeat: // Matches sub_[0] at least min_ times, at most max_ times. + case kRegexpAnyChar: // Matches any character. + case kRegexpAnyByte: // Matches any byte [sic]. + case kRegexpBeginLine: // Matches empty string at beginning of line. + case kRegexpEndLine: // Matches empty string at end of line. + case kRegexpWordBoundary: // Matches word boundary "\b". + case kRegexpNoWordBoundary: // Matches not-a-word boundary "\B". + case kRegexpBeginText: // Matches empty string at beginning of text. + case kRegexpEndText: // Matches empty string at end of text. + case kRegexpHaveMatch: // Forces match of entire expression + generators[re] = std::make_shared(re); + } + + return pre_arg; + } + + Regexp * root = nullptr; + Generators generators; +}; + +} + + +namespace DB +{ + +void RandomStringGeneratorByRegexp::RegexpPtrDeleter::operator() (re2::Regexp * re) const noexcept +{ + re->Decref(); +} + +RandomStringGeneratorByRegexp::RandomStringGeneratorByRegexp(const String & re_str) +{ + re2::RE2::Options options; + options.set_case_sensitive(true); + options.set_encoding(re2::RE2::Options::EncodingLatin1); + auto flags = static_cast(options.ParseFlags()); + + re2::RegexpStatus status; + regexp.reset(re2::Regexp::Parse(re_str, flags, &status)); + + if (!regexp) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "Error parsing regexp '{}': {}", + re_str, status.Text()); + + regexp.reset(regexp->Simplify()); + + auto walker = re2::RandomStringPrepareWalker(); + walker.Walk(regexp.get(), {}); + generatorFunc = walker.getGenerator(); + + { + auto test_check = generate(); + auto matched = RE2::FullMatch(test_check, re2::RE2(re_str)); + if (!matched) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "Generator is unable to produce random string for regexp '{}': {}", + re_str, test_check); + } +} + +String RandomStringGeneratorByRegexp::generate() const +{ + chassert(generatorFunc); + return generatorFunc(); +} + +} diff --git a/src/Common/MatchGenerator.h b/src/Common/MatchGenerator.h new file mode 100644 index 00000000000..68b22404d5a --- /dev/null +++ b/src/Common/MatchGenerator.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +namespace re2 +{ + class Regexp; +} + +namespace DB +{ + +class RandomStringGeneratorByRegexp +{ +public: + explicit RandomStringGeneratorByRegexp(const String & re_str); + String generate() const; + +private: + struct RegexpPtrDeleter + { + void operator()(re2::Regexp * re) const noexcept; + }; + using RegexpPtr = std::unique_ptr; + + RegexpPtr regexp; + std::function generatorFunc; +}; + +} diff --git a/src/Common/ObjectStorageKey.cpp b/src/Common/ObjectStorageKey.cpp index ca5617c8aa2..feda1d9ac29 100644 --- a/src/Common/ObjectStorageKey.cpp +++ b/src/Common/ObjectStorageKey.cpp @@ -65,4 +65,5 @@ ObjectStorageKey ObjectStorageKey::createAsAbsolute(String key_) object_key.is_relative = false; return object_key; } + } diff --git a/src/Common/ObjectStorageKeyGenerator.cpp b/src/Common/ObjectStorageKeyGenerator.cpp new file mode 100644 index 00000000000..7b4507a3abc --- /dev/null +++ b/src/Common/ObjectStorageKeyGenerator.cpp @@ -0,0 +1,94 @@ +#include "ObjectStorageKeyGenerator.h" + +#include +#include + +#include + + +class GeneratorWithTemplate : public DB::IObjectStorageKeysGenerator +{ +public: + explicit GeneratorWithTemplate(String key_template_) + : key_template(std::move(key_template_)) + , re_gen(key_template) + { + } + DB::ObjectStorageKey generate(const String &) const override + { + return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); + } + +private: + String key_template; + DB::RandomStringGeneratorByRegexp re_gen; +}; + + +class GeneratorWithPrefix : public DB::IObjectStorageKeysGenerator +{ +public: + explicit GeneratorWithPrefix(String key_prefix_) + : key_prefix(std::move(key_prefix_)) + {} + + DB::ObjectStorageKey generate(const String &) const override + { + /// Path to store the new S3 object. + + /// Total length is 32 a-z characters for enough randomness. + /// First 3 characters are used as a prefix for + /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/ + constexpr size_t key_name_total_size = 32; + constexpr size_t key_name_prefix_size = 3; + + /// Path to store new S3 object. + String key = fmt::format("{}/{}", + DB::getRandomASCIIString(key_name_prefix_size), + DB::getRandomASCIIString(key_name_total_size - key_name_prefix_size)); + + /// what ever key_prefix value is, consider that key as relative + return DB::ObjectStorageKey::createAsRelative(key_prefix, key); + } + +private: + String key_prefix; +}; + + +class GeneratorAsIsWithPrefix : public DB::IObjectStorageKeysGenerator +{ +public: + explicit GeneratorAsIsWithPrefix(String key_prefix_) + : key_prefix(std::move(key_prefix_)) + {} + + DB::ObjectStorageKey generate(const String & path) const override + { + return DB::ObjectStorageKey::createAsRelative(key_prefix, path); + } + +private: + String key_prefix; +}; + + +namespace DB +{ + +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix) +{ + return std::make_shared(std::move(key_prefix)); +} + +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix) +{ + return std::make_shared(std::move(key_prefix)); +} + +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template) +{ + return std::make_shared(std::move(key_template)); +} + +} diff --git a/src/Common/ObjectStorageKeyGenerator.h b/src/Common/ObjectStorageKeyGenerator.h new file mode 100644 index 00000000000..29f2a4a22c2 --- /dev/null +++ b/src/Common/ObjectStorageKeyGenerator.h @@ -0,0 +1,22 @@ +#pragma once + +#include "ObjectStorageKey.h" +#include + +namespace DB +{ + +class IObjectStorageKeysGenerator +{ +public: + virtual ObjectStorageKey generate(const String & path) const = 0; + virtual ~IObjectStorageKeysGenerator() = default; +}; + +using ObjectStorageKeysGeneratorPtr = std::shared_ptr; + +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix); +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix); +ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template); + +} diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index b566ffc6045..8a580469278 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -484,7 +484,7 @@ OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regex if (!is_trivial) { /// Compile the re2 regular expression. - typename re2::RE2::Options regexp_options; + re2::RE2::Options regexp_options; /// Never write error messages to stderr. It's ignorant to do it from library code. regexp_options.set_log_errors(false); diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index 4521b81dfe2..b3b4b9497b5 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -5,17 +5,9 @@ #include #include #include +#include #include "config.h" -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - /** Uses two ways to optimize a regular expression: * 1. If the regular expression is trivial (reduces to finding a substring in a string), * then replaces the search with strstr or strcasestr. diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 543a39fbc39..f960d551996 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -124,7 +124,9 @@ public: size_t max_ignored_errors, bool fallback_to_stale_replicas, const TryGetEntryFunc & try_get_entry, - const GetPriorityFunc & get_priority = GetPriorityFunc()); + const GetPriorityFunc & get_priority); + + size_t getPoolSize() const { return nested_pools.size(); } protected: @@ -147,7 +149,7 @@ protected: return std::make_tuple(shared_pool_states, nested_pools, last_error_decrease_time); } - NestedPools nested_pools; + const NestedPools nested_pools; const time_t decrease_error_period; const size_t max_error_cap; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 119e0d99143..8782f895f3f 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -391,6 +391,9 @@ The server successfully detected this situation and will download merged part fr M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \ M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \ \ + M(S3Clients, "Number of created S3 clients.") \ + M(TinyS3Clients, "Number of S3 clients copies which reuse an existing auth provider from another client.") \ + \ M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \ \ M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \ @@ -599,6 +602,19 @@ The server successfully detected this situation and will download merged part fr M(LogError, "Number of log messages with level Error") \ M(LogFatal, "Number of log messages with level Fatal") \ \ + M(InterfaceHTTPSendBytes, "Number of bytes sent through HTTP interfaces") \ + M(InterfaceHTTPReceiveBytes, "Number of bytes received through HTTP interfaces") \ + M(InterfaceNativeSendBytes, "Number of bytes sent through native interfaces") \ + M(InterfaceNativeReceiveBytes, "Number of bytes received through native interfaces") \ + M(InterfacePrometheusSendBytes, "Number of bytes sent through Prometheus interfaces") \ + M(InterfacePrometheusReceiveBytes, "Number of bytes received through Prometheus interfaces") \ + M(InterfaceInterserverSendBytes, "Number of bytes sent through interserver interfaces") \ + M(InterfaceInterserverReceiveBytes, "Number of bytes received through interserver interfaces") \ + M(InterfaceMySQLSendBytes, "Number of bytes sent through MySQL interfaces") \ + M(InterfaceMySQLReceiveBytes, "Number of bytes received through MySQL interfaces") \ + M(InterfacePostgreSQLSendBytes, "Number of bytes sent through PostgreSQL interfaces") \ + M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \ + \ M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS diff --git a/src/Common/RemoteHostFilter.cpp b/src/Common/RemoteHostFilter.cpp index e4dd506b682..815be8902e6 100644 --- a/src/Common/RemoteHostFilter.cpp +++ b/src/Common/RemoteHostFilter.cpp @@ -3,17 +3,9 @@ #include #include #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { namespace ErrorCodes diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp index 2b21c223bd8..7c9abf11fea 100644 --- a/src/Common/SensitiveDataMasker.cpp +++ b/src/Common/SensitiveDataMasker.cpp @@ -4,18 +4,10 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - #include #include +#include #include #include diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 21235914f7c..fe513199ac2 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -296,6 +296,9 @@ constexpr std::pair replacements[] // Replace parts from @c replacements with shorter aliases String demangleAndCollapseNames(std::string_view file, const char * const symbol_name) { + if (!symbol_name) + return "?"; + std::string_view file_copy = file; if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos) file_copy.remove_suffix(file.size() - trim_pos); diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index 98708c653c3..338cc543385 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -11,37 +12,37 @@ namespace DB class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota { public: - void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) override + void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) override { - quotas[user_name] = {max_size_in_bytes, max_entries}; + quotas[user_id] = {max_size_in_bytes, max_entries}; } - void increaseActual(const String & user_name, size_t entry_size_in_bytes) override + void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) override { - auto & actual_for_user = actual[user_name]; + auto & actual_for_user = actual[user_id]; actual_for_user.size_in_bytes += entry_size_in_bytes; actual_for_user.num_items += 1; } - void decreaseActual(const String & user_name, size_t entry_size_in_bytes) override + void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) override { - chassert(actual.contains(user_name)); + chassert(actual.contains(user_id)); - chassert(actual[user_name].size_in_bytes >= entry_size_in_bytes); - actual[user_name].size_in_bytes -= entry_size_in_bytes; + chassert(actual[user_id].size_in_bytes >= entry_size_in_bytes); + actual[user_id].size_in_bytes -= entry_size_in_bytes; - chassert(actual[user_name].num_items >= 1); - actual[user_name].num_items -= 1; + chassert(actual[user_id].num_items >= 1); + actual[user_id].num_items -= 1; } - bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const override + bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const override { - auto it_actual = actual.find(user_name); + auto it_actual = actual.find(user_id); Resources actual_for_user{.size_in_bytes = 0, .num_items = 0}; /// assume zero actual resource consumption is user isn't found if (it_actual != actual.end()) actual_for_user = it_actual->second; - auto it_quota = quotas.find(user_name); + auto it_quota = quotas.find(user_id); Resources quota_for_user{.size_in_bytes = std::numeric_limits::max(), .num_items = std::numeric_limits::max()}; /// assume no threshold if no quota is found if (it_quota != quotas.end()) quota_for_user = it_quota->second; @@ -69,10 +70,10 @@ public: size_t num_items = 0; }; - /// user name --> cache size quota (in bytes) / number of items quota - std::map quotas; - /// user name --> actual cache usage (in bytes) / number of items - std::map actual; + /// user id --> cache size quota (in bytes) / number of items quota + std::map quotas; + /// user id --> actual cache usage (in bytes) / number of items + std::map actual; }; @@ -132,7 +133,8 @@ public: if (it == cache.end()) return; size_t sz = weight_function(*it->second); - Base::user_quotas->decreaseActual(it->first.user_name, sz); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); cache.erase(it); size_in_bytes -= sz; } @@ -169,7 +171,9 @@ public: /// Checks against per-user limits auto sufficient_space_in_cache_for_user = [&]() { - return Base::user_quotas->approveWrite(key.user_name, entry_size_in_bytes); + if (key.user_id.has_value()) + return Base::user_quotas->approveWrite(*key.user_id, entry_size_in_bytes); + return true; }; if (!sufficient_space_in_cache() || !sufficient_space_in_cache_for_user()) @@ -179,7 +183,8 @@ public: if (is_stale_function(it->first)) { size_t sz = weight_function(*it->second); - Base::user_quotas->decreaseActual(it->first.user_name, sz); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); it = cache.erase(it); size_in_bytes -= sz; } @@ -193,14 +198,16 @@ public: if (auto it = cache.find(key); it != cache.end()) { size_t sz = weight_function(*it->second); - Base::user_quotas->decreaseActual(it->first.user_name, sz); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); cache.erase(it); // stupid bug: (*) doesn't replace existing entries (likely due to custom hash function), need to erase explicitly size_in_bytes -= sz; } cache[key] = std::move(mapped); // (*) size_in_bytes += entry_size_in_bytes; - Base::user_quotas->increaseActual(key.user_name, entry_size_in_bytes); + if (key.user_id.has_value()) + Base::user_quotas->increaseActual(*key.user_id, entry_size_in_bytes); } } diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index bf7de46ae6d..b115d3a8734 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -23,6 +23,12 @@ UInt32 getSupportedArchs() result |= static_cast(TargetArch::AVX512VBMI); if (Cpu::CpuFlagsCache::have_AVX512VBMI2) result |= static_cast(TargetArch::AVX512VBMI2); + if (Cpu::CpuFlagsCache::have_AMXBF16) + result |= static_cast(TargetArch::AMXBF16); + if (Cpu::CpuFlagsCache::have_AMXTILE) + result |= static_cast(TargetArch::AMXTILE); + if (Cpu::CpuFlagsCache::have_AMXINT8) + result |= static_cast(TargetArch::AMXINT8); return result; } @@ -44,6 +50,9 @@ String toString(TargetArch arch) case TargetArch::AVX512BW: return "avx512bw"; case TargetArch::AVX512VBMI: return "avx512vbmi"; case TargetArch::AVX512VBMI2: return "avx512vbmi2"; + case TargetArch::AMXBF16: return "amxbf16"; + case TargetArch::AMXTILE: return "amxtile"; + case TargetArch::AMXINT8: return "amxint8"; } UNREACHABLE(); diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 68f6d39c3ff..e03036747f9 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -83,6 +83,9 @@ enum class TargetArch : UInt32 AVX512BW = (1 << 4), AVX512VBMI = (1 << 5), AVX512VBMI2 = (1 << 6), + AMXBF16 = (1 << 7), + AMXTILE = (1 << 8), + AMXINT8 = (1 << 9), }; /// Runtime detection. diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f84e28942c5..5b3388a3b7d 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -28,6 +28,7 @@ #cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_FASTOPS #cmakedefine01 USE_SQIDS +#cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_LIBURING diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 834111ec754..72e67619859 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,15 +7,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { namespace ErrorCodes diff --git a/src/Common/randomNumber.h b/src/Common/randomNumber.h new file mode 100644 index 00000000000..b795f32baca --- /dev/null +++ b/src/Common/randomNumber.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +inline UInt32 randomNumber() +{ + pcg64_fast rng{randomSeed()}; + std::uniform_int_distribution dist6( + std::numeric_limits::min(), std::numeric_limits::max()); + return static_cast(dist6(rng)); +} diff --git a/src/Common/re2.h b/src/Common/re2.h new file mode 100644 index 00000000000..c81b7157e91 --- /dev/null +++ b/src/Common/re2.h @@ -0,0 +1,11 @@ +#pragma once + + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif +#include +#ifdef __clang__ +# pragma clang diagnostic pop +#endif diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 5c54dedbbde..ea8485fee92 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -544,6 +546,99 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) } } +TEST(AsyncLoader, IgnoreDependencyFailure) +{ + AsyncLoaderTest t; + std::atomic success{false}; + t.loader.start(); + + std::string_view error_message = "test job failure"; + + auto failed_job_func = [&] (AsyncLoader &, const LoadJobPtr &) { + throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); + }; + auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) { + success.store(true); + }; + + auto failed_job = makeLoadJob({}, "failed_job", failed_job_func); + auto dependent_job = makeLoadJob({failed_job}, + "dependent_job", ignoreDependencyFailure, dependent_job_func); + auto task = t.schedule({ failed_job, dependent_job }); + + t.loader.wait(); + + ASSERT_EQ(failed_job->status(), LoadStatus::FAILED); + ASSERT_EQ(dependent_job->status(), LoadStatus::OK); + ASSERT_EQ(success.load(), true); +} + +TEST(AsyncLoader, CustomDependencyFailure) +{ + AsyncLoaderTest t(16); + int error_count = 0; + std::atomic good_count{0}; + std::barrier canceled_sync(4); + t.loader.start(); + + std::string_view error_message = "test job failure"; + + auto evil_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) { + throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); + }; + auto good_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) { + good_count++; + }; + auto late_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) { + canceled_sync.arrive_and_wait(); // wait for fail (A) before this job is finished + }; + auto collect_job_func = [&] (AsyncLoader &, const LoadJobPtr &) { + FAIL(); // job should be canceled, so we never get here + }; + auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) { + FAIL(); // job should be canceled, so we never get here + }; + auto fail_after_two = [&] (const LoadJobPtr & self, const LoadJobPtr &, std::exception_ptr & cancel) { + if (++error_count == 2) + cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' canceled: too many dependencies have failed", + self->name)); + }; + + auto evil_dep1 = makeLoadJob({}, "evil_dep1", evil_dep_func); + auto evil_dep2 = makeLoadJob({}, "evil_dep2", evil_dep_func); + auto evil_dep3 = makeLoadJob({}, "evil_dep3", evil_dep_func); + auto good_dep1 = makeLoadJob({}, "good_dep1", good_dep_func); + auto good_dep2 = makeLoadJob({}, "good_dep2", good_dep_func); + auto good_dep3 = makeLoadJob({}, "good_dep3", good_dep_func); + auto late_dep1 = makeLoadJob({}, "late_dep1", late_dep_func); + auto late_dep2 = makeLoadJob({}, "late_dep2", late_dep_func); + auto late_dep3 = makeLoadJob({}, "late_dep3", late_dep_func); + auto collect_job = makeLoadJob({ + evil_dep1, evil_dep2, evil_dep3, + good_dep1, good_dep2, good_dep3, + late_dep1, late_dep2, late_dep3 + }, "collect_job", fail_after_two, collect_job_func); + auto dependent_job1 = makeLoadJob({ collect_job }, "dependent_job1", dependent_job_func); + auto dependent_job2 = makeLoadJob({ collect_job }, "dependent_job2", dependent_job_func); + auto dependent_job3 = makeLoadJob({ collect_job }, "dependent_job3", dependent_job_func); + auto task = t.schedule({ dependent_job1, dependent_job2, dependent_job3 }); // Other jobs should be discovery automatically + + t.loader.wait(collect_job, true); + canceled_sync.arrive_and_wait(); // (A) + + t.loader.wait(); + + ASSERT_EQ(late_dep1->status(), LoadStatus::OK); + ASSERT_EQ(late_dep2->status(), LoadStatus::OK); + ASSERT_EQ(late_dep3->status(), LoadStatus::OK); + ASSERT_EQ(collect_job->status(), LoadStatus::CANCELED); + ASSERT_EQ(dependent_job1->status(), LoadStatus::CANCELED); + ASSERT_EQ(dependent_job2->status(), LoadStatus::CANCELED); + ASSERT_EQ(dependent_job3->status(), LoadStatus::CANCELED); + ASSERT_EQ(good_count.load(), 3); +} + TEST(AsyncLoader, TestConcurrency) { AsyncLoaderTest t(10); diff --git a/src/Common/tests/gtest_generate_random_by_regexp.cpp b/src/Common/tests/gtest_generate_random_by_regexp.cpp new file mode 100644 index 00000000000..063257bdfd7 --- /dev/null +++ b/src/Common/tests/gtest_generate_random_by_regexp.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include + +#include + +void routine(String s) +{ + std::cerr << "case '"<< s << "'"; + auto gen = DB::RandomStringGeneratorByRegexp(s); + [[maybe_unused]] auto res = gen.generate(); + std::cerr << " result '"<< res << "'" << std::endl; +} + +TEST(GenerateRandomString, Positive) +{ + routine("."); + routine("[[:xdigit:]]"); + routine("[0-9a-f]"); + routine("[a-z]"); + routine("prefix-[0-9a-f]-suffix"); + routine("prefix-[a-z]-suffix"); + routine("[0-9a-f]{3}"); + routine("prefix-[0-9a-f]{3}-suffix"); + routine("prefix-[a-z]{3}-suffix/[0-9a-f]{20}"); + routine("left|right"); + routine("[a-z]{0,3}"); + routine("just constant string"); + routine("[a-z]?"); + routine("[a-z]*"); + routine("[a-z]+"); + routine("[^a-z]"); + routine("[[:lower:]]{3}/suffix"); + routine("prefix-(A|B|[0-9a-f]){3}"); + routine("mergetree/[a-z]{3}/[a-z]{29}"); +} + +TEST(GenerateRandomString, Negative) +{ + EXPECT_THROW(routine("[[:do_not_exists:]]"), DB::Exception); + EXPECT_THROW(routine("[:do_not_exis..."), DB::Exception); + EXPECT_THROW(routine("^abc"), DB::Exception); +} + +TEST(GenerateRandomString, DifferentResult) +{ + std::cerr << "100 different keys" << std::endl; + auto gen = DB::RandomStringGeneratorByRegexp("prefix-[a-z]{3}-suffix/[0-9a-f]{20}"); + std::set deduplicate; + for (int i = 0; i < 100; ++i) + ASSERT_TRUE(deduplicate.insert(gen.generate()).second); + std::cerr << "100 different keys: ok" << std::endl; +} + +TEST(GenerateRandomString, FullRange) +{ + std::cerr << "all possible letters" << std::endl; + auto gen = DB::RandomStringGeneratorByRegexp("[a-z]"); + std::set deduplicate; + int count = 'z' - 'a' + 1; + while (deduplicate.size() < count) + if (deduplicate.insert(gen.generate()).second) + std::cerr << " +1 "; + std::cerr << "all possible letters, ok" << std::endl; +} + +UInt64 elapsed(DB::ObjectStorageKeysGeneratorPtr generator) +{ + String path = "some_path"; + + Stopwatch watch; + + for (int i = 0; i < 100000; ++i) + { + [[ maybe_unused ]] auto result = generator->generate(path).serialize(); + } + + return watch.elapsedMicroseconds(); +} + +TEST(ObjectStorageKey, Performance) +{ + auto elapsed_old = elapsed(DB::createObjectStorageKeysGeneratorByPrefix( + "xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/")); + std::cerr << "old: " << elapsed_old << std::endl; + + auto elapsed_new = elapsed(DB::createObjectStorageKeysGeneratorByTemplate( + "xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/[a-z]{3}/[a-z]{29}")); + std::cerr << "new: " << elapsed_new << std::endl; + + if (elapsed_new > elapsed_old) + { + if (elapsed_new > elapsed_old) + std::cerr << "slow ratio: +" << float(elapsed_new) / elapsed_old << std::endl; + else + std::cerr << "fast ratio: " << float(elapsed_old) / elapsed_new << std::endl; + ASSERT_LT(elapsed_new, 1.5 * elapsed_old); + } + +} diff --git a/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp b/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp index 639fe38e56b..6e15c0f712c 100644 --- a/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp +++ b/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp @@ -1,15 +1,7 @@ #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - using namespace DB; diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp index ec37ec6a7b5..7aecb652efc 100644 --- a/src/Compression/CompressionCodecZSTD.cpp +++ b/src/Compression/CompressionCodecZSTD.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -9,42 +9,11 @@ #include #include #include - +#include +#include namespace DB { - -class CompressionCodecZSTD : public ICompressionCodec -{ -public: - static constexpr auto ZSTD_DEFAULT_LEVEL = 1; - static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24; - - explicit CompressionCodecZSTD(int level_); - CompressionCodecZSTD(int level_, int window_log); - - uint8_t getMethodByte() const override; - - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - - void updateHash(SipHash & hash) const override; - -protected: - - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - -private: - const int level; - const bool enable_long_range; - const int window_log; -}; - - namespace ErrorCodes { extern const int CANNOT_COMPRESS; @@ -82,7 +51,7 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s ZSTD_freeCCtx(cctx); if (ZSTD_isError(compressed_size)) - throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", std::string(ZSTD_getErrorName(compressed_size))); + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", ZSTD_getErrorName(compressed_size)); return static_cast(compressed_size); } @@ -96,13 +65,19 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res))); } -CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_) +CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) + : level(level_) + , enable_long_range(true) + , window_log(window_log_) { setCodecDescription( "ZSTD", {std::make_shared(static_cast(level)), std::make_shared(static_cast(window_log))}); } -CompressionCodecZSTD::CompressionCodecZSTD(int level_) : level(level_), enable_long_range(false), window_log(0) +CompressionCodecZSTD::CompressionCodecZSTD(int level_) + : level(level_) + , enable_long_range(false) + , window_log(0) { setCodecDescription("ZSTD", {std::make_shared(static_cast(level))}); } diff --git a/src/Compression/CompressionCodecZSTD.h b/src/Compression/CompressionCodecZSTD.h new file mode 100644 index 00000000000..cdded9fc08a --- /dev/null +++ b/src/Compression/CompressionCodecZSTD.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +namespace DB +{ + +class CompressionCodecZSTD : public ICompressionCodec +{ +public: + static constexpr auto ZSTD_DEFAULT_LEVEL = 1; + static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24; + + explicit CompressionCodecZSTD(int level_); + CompressionCodecZSTD(int level_, int window_log); + + uint8_t getMethodByte() const override; + + UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; + + void updateHash(SipHash & hash) const override; + +protected: + + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + + bool isCompression() const override { return true; } + bool isGenericCompression() const override { return true; } + +private: + const int level; + const bool enable_long_range; + const int window_log; +}; + +} diff --git a/src/Compression/CompressionCodecZSTDQAT.cpp b/src/Compression/CompressionCodecZSTDQAT.cpp new file mode 100644 index 00000000000..4828a71a515 --- /dev/null +++ b/src/Compression/CompressionCodecZSTDQAT.cpp @@ -0,0 +1,113 @@ +#ifdef ENABLE_ZSTD_QAT_CODEC + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; + extern const int ILLEGAL_CODEC_PARAMETER; +} + +/// Hardware-accelerated ZSTD. Supports only compression so far. +class CompressionCodecZSTDQAT : public CompressionCodecZSTD +{ +public: + static constexpr auto ZSTDQAT_SUPPORTED_MIN_LEVEL = 1; + static constexpr auto ZSTDQAT_SUPPORTED_MAX_LEVEL = 12; + static constexpr int ZSTDQAT_DEVICE_UNINITIALIZED = 0XFFFF; + + explicit CompressionCodecZSTDQAT(int level_); + +protected: + bool isZstdQat() const override { return true; } + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + +private: + const int level; + Poco::Logger * log; + static std::atomic qat_state; /// Global initialization status of QAT device, we fall back back to software compression if uninitialized +}; + +std::atomic CompressionCodecZSTDQAT::qat_state = ZSTDQAT_DEVICE_UNINITIALIZED; + +UInt32 CompressionCodecZSTDQAT::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + if (qat_state == ZSTDQAT_DEVICE_UNINITIALIZED) + { + qat_state = QZSTD_startQatDevice(); + if (qat_state == QZSTD_OK) + LOG_DEBUG(log, "Initialization of hardware-assissted ZSTD_QAT codec successful"); + else + LOG_WARNING(log, "Initialization of hardware-assisted ZSTD_QAT codec failed, falling back to software ZSTD codec -> status: {}", qat_state); + } + + ZSTD_CCtx * cctx = ZSTD_createCCtx(); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); + + void * sequence_producer_state = nullptr; + if (qat_state == QZSTD_OK) + { + sequence_producer_state = QZSTD_createSeqProdState(); + ZSTD_registerSequenceProducer(cctx, sequence_producer_state, qatSequenceProducer); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1); + } + + size_t compressed_size = ZSTD_compress2(cctx, dest, ZSTD_compressBound(source_size), source, source_size); + QZSTD_freeSeqProdState(sequence_producer_state); + ZSTD_freeCCtx(cctx); + + if (ZSTD_isError(compressed_size)) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD_QAT codec: {}", ZSTD_getErrorName(compressed_size)); + + return static_cast(compressed_size); +} + +void registerCodecZSTDQAT(CompressionCodecFactory & factory) +{ + UInt8 method_code = static_cast(CompressionMethodByte::ZSTD_QPL); + factory.registerCompressionCodec("ZSTD_QAT", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr + { + int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL; + if (arguments && !arguments->children.empty()) + { + if (arguments->children.size() > 1) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "ZSTD_QAT codec must have 1 parameter, given {}", arguments->children.size()); + + const auto children = arguments->children; + const auto * literal = children[0]->as(); + if (!literal) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "ZSTD_QAT codec argument must be integer"); + + level = static_cast(literal->value.safeGet()); + if (level < CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MIN_LEVEL || level > CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MAX_LEVEL) + /// that's a hardware limitation + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, + "ZSTDQAT codec doesn't support level more than {} and lower than {} , given {}", + CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MAX_LEVEL, CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MIN_LEVEL, level); + } + + return std::make_shared(level); + }); +} + +CompressionCodecZSTDQAT::CompressionCodecZSTDQAT(int level_) + : CompressionCodecZSTD(level_) + , level(level_) + , log(&Poco::Logger::get("CompressionCodecZSTDQAT")) +{ + setCodecDescription("ZSTD_QAT", {std::make_shared(static_cast(level))}); +} + +} + +#endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 7959c431328..f4413401667 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -167,6 +167,9 @@ void registerCodecNone(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); +#ifdef ENABLE_ZSTD_QAT_CODEC +void registerCodecZSTDQAT(CompressionCodecFactory & factory); +#endif void registerCodecMultiple(CompressionCodecFactory & factory); #ifdef ENABLE_QPL_COMPRESSION void registerCodecDeflateQpl(CompressionCodecFactory & factory); @@ -189,6 +192,9 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecNone(*this); registerCodecLZ4(*this); registerCodecZSTD(*this); +#ifdef ENABLE_ZSTD_QAT_CODEC + registerCodecZSTDQAT(*this); +#endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index 4f2627587a3..e71476d564d 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 98e9e7480da..f4d993f628e 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec); + {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec); + {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const { if (const auto * func = ast->as()) { @@ -165,6 +165,12 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'enable_deflate_qpl_codec' setting.", codec_family_name); + if (!enable_zstd_qat_codec && result_codec->isZstdQat()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Codec {} is disabled by default." + " You can enable it with the 'enable_zstd_qat_codec' setting.", + codec_family_name); + codecs_descriptions->children.emplace_back(result_codec->getCodecDesc()); } diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 1b4025fed1d..ee4b3e38653 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -48,6 +48,7 @@ enum class CompressionMethodByte : uint8_t FPC = 0x98, DeflateQpl = 0x99, GCD = 0x9a, + ZSTD_QPL = 0x9b, }; } diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index ca794511268..18ff543d908 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -121,6 +121,9 @@ public: /// Is this the DEFLATE_QPL codec? virtual bool isDeflateQpl() const { return false; } + /// Is this the ZSTD_QAT codec? + virtual bool isZstdQat() const { return false; } + /// If it does nothing. virtual bool isNone() const { return false; } diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index cdd691f6a79..2436d730ae4 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -1,10 +1,11 @@ #include #include -#include #include #include #include +#include "config.h" + namespace DB { namespace ErrorCodes @@ -36,7 +37,11 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco } -const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld"; +const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = +#if USE_JEMALLOC +"jmst,jmfp,jmep,jmdp," +#endif +"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld"; KeeperConfigurationAndSettings::KeeperConfigurationAndSettings() : server_id(NOT_EXIST) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index be2c5ebd071..803c6eb594e 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -18,6 +18,11 @@ #include #include +#if USE_JEMALLOC +#include +#include +#endif + namespace { @@ -175,6 +180,20 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr yield_leadership_command = std::make_shared(keeper_dispatcher); factory.registerCommand(yield_leadership_command); +#if USE_JEMALLOC + FourLetterCommandPtr jemalloc_dump_stats = std::make_shared(keeper_dispatcher); + factory.registerCommand(jemalloc_dump_stats); + + FourLetterCommandPtr jemalloc_flush_profile = std::make_shared(keeper_dispatcher); + factory.registerCommand(jemalloc_flush_profile); + + FourLetterCommandPtr jemalloc_enable_profile = std::make_shared(keeper_dispatcher); + factory.registerCommand(jemalloc_enable_profile); + + FourLetterCommandPtr jemalloc_disable_profile = std::make_shared(keeper_dispatcher); + factory.registerCommand(jemalloc_disable_profile); +#endif + factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); } @@ -588,4 +607,37 @@ String YieldLeadershipCommand::run() return "Sent yield leadership request to leader."; } +#if USE_JEMALLOC + +void printToString(void * output, const char * data) +{ + std::string * output_data = reinterpret_cast(output); + *output_data += std::string(data); +} + +String JemallocDumpStats::run() +{ + std::string output; + malloc_stats_print(printToString, &output, nullptr); + return output; +} + +String JemallocFlushProfile::run() +{ + return flushJemallocProfile("/tmp/jemalloc_keeper"); +} + +String JemallocEnableProfile::run() +{ + setJemallocProfileActive(true); + return "ok"; +} + +String JemallocDisableProfile::run() +{ + setJemallocProfileActive(false); + return "ok"; +} +#endif + } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 4702dd10415..7fc044881cf 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -428,4 +428,55 @@ struct YieldLeadershipCommand : public IFourLetterCommand ~YieldLeadershipCommand() override = default; }; +#if USE_JEMALLOC +struct JemallocDumpStats : public IFourLetterCommand +{ + explicit JemallocDumpStats(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "jmst"; } + String run() override; + ~JemallocDumpStats() override = default; + +}; + +struct JemallocFlushProfile : public IFourLetterCommand +{ + explicit JemallocFlushProfile(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "jmfp"; } + String run() override; + ~JemallocFlushProfile() override = default; +}; + +struct JemallocEnableProfile : public IFourLetterCommand +{ + explicit JemallocEnableProfile(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "jmep"; } + String run() override; + ~JemallocEnableProfile() override = default; +}; + +struct JemallocDisableProfile : public IFourLetterCommand +{ + explicit JemallocDisableProfile(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "jmdp"; } + String run() override; + ~JemallocDisableProfile() override = default; +}; +#endif + } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index dcd22552fe3..9c32d8a2ab7 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -15,16 +15,10 @@ #include #include #include -#include -#include #include #if USE_JEMALLOC -# include - -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) - +#include #endif namespace CurrentMetrics @@ -33,12 +27,6 @@ namespace CurrentMetrics extern const Metric KeeperOutstandingRequets; } -namespace ProfileEvents -{ - extern const Event MemoryAllocatorPurge; - extern const Event MemoryAllocatorPurgeTimeMicroseconds; -} - using namespace std::chrono_literals; namespace DB @@ -986,11 +974,7 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const void KeeperDispatcher::cleanResources() { #if USE_JEMALLOC - LOG_TRACE(&Poco::Logger::get("KeeperDispatcher"), "Purging unused memory"); - Stopwatch watch; - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); + purgeJemallocArenas(); #endif } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 965e743da39..26ee3668ef6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -630,6 +630,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ const auto preprocess_logs = [&] { auto lock = raft_instance->lockRaft(); + + if (keeper_context->local_logs_preprocessed) + return; + keeper_context->local_logs_preprocessed = true; auto log_store = state_manager->load_log_store(); auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, log_store->next_slot()); diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 910615bf6ef..716184e07d0 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -70,7 +70,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo { std::lock_guard client_lock{snapshot_s3_client_mutex}; // if client is not changed (same auth settings, same endpoint) we don't need to update - if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings + if (snapshot_s3_client && snapshot_s3_client->client && !snapshot_s3_client->auth_settings.hasUpdates(auth_settings) && snapshot_s3_client->uri.uri == new_uri.uri) return; } diff --git a/src/Coordination/RaftServerConfig.h b/src/Coordination/RaftServerConfig.h index 451d61a436e..0ecbd6464c1 100644 --- a/src/Coordination/RaftServerConfig.h +++ b/src/Coordination/RaftServerConfig.h @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { // default- and copy-constructible version of nuraft::srv_config diff --git a/src/Core/ExternalResultDescription.cpp b/src/Core/ExternalResultDescription.cpp index f7e8a69d355..87f189a4f86 100644 --- a/src/Core/ExternalResultDescription.cpp +++ b/src/Core/ExternalResultDescription.cpp @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include namespace DB @@ -41,6 +43,12 @@ void ExternalResultDescription::init(const Block & sample_block_) DataTypePtr type_not_nullable = removeNullable(elem.type); const IDataType * type = type_not_nullable.get(); + if (dynamic_cast(type->getCustomName())) + { + types.emplace_back(ValueType::vtPoint, is_nullable); + continue; + } + WhichDataType which(type); if (which.isUInt8()) diff --git a/src/Core/ExternalResultDescription.h b/src/Core/ExternalResultDescription.h index b7d852b99cf..1dbce289379 100644 --- a/src/Core/ExternalResultDescription.h +++ b/src/Core/ExternalResultDescription.h @@ -35,7 +35,8 @@ struct ExternalResultDescription vtDecimal128, vtDecimal256, vtArray, - vtFixedString + vtFixedString, + vtPoint, }; Block sample_block; diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index 2b46c3d14ad..7916f882979 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -188,4 +188,46 @@ String GTIDSets::toPayload() const return buffer.str(); } +bool GTIDSet::contains(const GTIDSet & gtid_set) const +{ + //we contain the other set if each of its intervals are contained in any of our intervals. + //use the fact that intervals are sorted to make this linear instead of quadratic. + if (uuid != gtid_set.uuid) { return false; } + + auto mine = intervals.begin(), other = gtid_set.intervals.begin(); + auto my_end = intervals.end(), other_end = gtid_set.intervals.end(); + while (mine != my_end && other != other_end) + { + bool mine_contains_other = mine->start <= other->start && mine->end >= other->end; + if (mine_contains_other) + { + ++other; + } + else + { + ++mine; + } + } + + return other == other_end; //if we've iterated through all intervals in the argument, all its intervals are contained in this +} + +bool GTIDSets::contains(const GTIDSet & gtid_set) const +{ + for (const auto & my_gtid_set : sets) + { + if (my_gtid_set.contains(gtid_set)) { return true; } + } + return false; +} + +bool GTIDSets::contains(const GTIDSets & gtid_sets) const +{ + for (const auto & gtid_set : gtid_sets.sets) + { + if (!this->contains(gtid_set)) { return false; } + } + return true; +} + } diff --git a/src/Core/MySQL/MySQLGtid.h b/src/Core/MySQL/MySQLGtid.h index 45eeaf02fa2..b7cff39cca6 100644 --- a/src/Core/MySQL/MySQLGtid.h +++ b/src/Core/MySQL/MySQLGtid.h @@ -28,6 +28,8 @@ public: void tryMerge(size_t i); static void tryShrink(GTIDSet & set, unsigned int i, Interval & current); + + bool contains(const GTIDSet & gtid_set) const; }; class GTIDSets @@ -40,6 +42,31 @@ public: String toString() const; String toPayload() const; + bool contains(const GTIDSet & gtid_set) const; + bool contains(const GTIDSets & gtid_sets) const; }; +inline bool operator==(const GTID & left, const GTID & right) +{ + return left.uuid == right.uuid + && left.seq_no == right.seq_no; +} + +inline bool operator==(const GTIDSet::Interval & left, const GTIDSet::Interval & right) +{ + return left.start == right.start + && left.end == right.end; +} + +inline bool operator==(const GTIDSet & left, const GTIDSet & right) +{ + return left.uuid == right.uuid + && left.intervals == right.intervals; +} + +inline bool operator==(const GTIDSets & left, const GTIDSets & right) +{ + return left.sets == right.sets; +} + } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index dcf42134b0b..403f98360c1 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -967,6 +967,59 @@ namespace MySQLReplication out << "[DryRun Event]" << '\n'; } + void UnparsedRowsEvent::dump(WriteBuffer & out) const + { + std::lock_guard lock(mutex); + header.dump(out); + out << "[UnparsedRowsEvent Event]" << '\n'; + out << "Unparsed Data Size: " << unparsed_data.size() << '\n'; + } + + void UnparsedRowsEvent::parseImpl(ReadBuffer & payload_) + { + char c = 0; + if (payload_.position() < payload_.buffer().end()) + unparsed_data.reserve(payload_.buffer().end() - payload_.position()); + /// Prevent reading after the end + /// payload.available() might have incorrect value + while (payload_.position() <= payload_.buffer().end() && payload_.read(c)) + unparsed_data.push_back(c); + if (!payload_.eof()) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Available {} bytes but not eof", payload_.available()); + } + + std::shared_ptr UnparsedRowsEvent::parse() + { + std::lock_guard lock(mutex); + if (!unparsed_data.empty()) + { + RowsEventHeader rows_header(header.type); + rows_header.table_id = table_id; + rows_header.flags = flags; + switch (header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + parsed_event = std::make_shared(table_map, EventHeader(header), rows_header); + break; + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + parsed_event = std::make_shared(table_map, EventHeader(header), rows_header); + break; + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + parsed_event = std::make_shared(table_map, EventHeader(header), rows_header); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown event type: {}", magic_enum::enum_name(header.type)); + } + ReadBufferFromMemory payload(unparsed_data.data(), unparsed_data.size()); + parsed_event->parseEvent(payload); + unparsed_data.clear(); + } + return parsed_event; + } + /// Update binlog name/position/gtid based on the event type. void Position::update(BinlogEventPtr event) { @@ -998,7 +1051,8 @@ namespace MySQLReplication case ROTATE_EVENT: { auto rotate = std::static_pointer_cast(event); binlog_name = rotate->next_binlog; - binlog_pos = event->header.log_pos; + /// If binlog name has changed, need to use position from next binlog + binlog_pos = rotate->position; break; } case GTID_EVENT: { @@ -1012,13 +1066,18 @@ namespace MySQLReplication default: throw ReplicationError(ErrorCodes::LOGICAL_ERROR, "Position update with unsupported event"); } + if (event->header.timestamp > 0) + { + timestamp = event->header.timestamp; + } } - void Position::update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_) + void Position::update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_, UInt32 binlog_time_) { binlog_pos = binlog_pos_; binlog_name = binlog_name_; gtid_sets.parse(gtid_sets_); + timestamp = binlog_time_; } void Position::dump(WriteBuffer & out) const diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index 1584dbd42ac..6ba507245b3 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -181,6 +181,7 @@ namespace MySQLReplication MYSQL_WRITE_ROWS_EVENT = 2, MYSQL_UPDATE_ROWS_EVENT = 3, MYSQL_DELETE_ROWS_EVENT = 4, + MYSQL_UNPARSED_ROWS_EVENT = 100, }; class ReplicationError : public DB::Exception @@ -274,6 +275,8 @@ namespace MySQLReplication String status; String schema; String query; + String query_database_name; + String query_table_name; QueryType typ = QUERY_EVENT_DDL; bool transaction_complete = true; @@ -446,7 +449,6 @@ namespace MySQLReplication void parseImpl(ReadBuffer & payload) override; void parseRow(ReadBuffer & payload, Bitmap & bitmap); - private: std::shared_ptr table_map; }; @@ -497,17 +499,38 @@ namespace MySQLReplication void parseImpl(ReadBuffer & payload) override; }; + class UnparsedRowsEvent : public RowsEvent + { + public: + UnparsedRowsEvent(const std::shared_ptr & table_map_, EventHeader && header_, const RowsEventHeader & rows_header) + : RowsEvent(table_map_, std::move(header_), rows_header) + { + } + + void dump(WriteBuffer & out) const override; + MySQLEventType type() const override { return MYSQL_UNPARSED_ROWS_EVENT; } + std::shared_ptr parse(); + + protected: + void parseImpl(ReadBuffer & payload) override; + std::vector unparsed_data; + std::shared_ptr parsed_event; + mutable std::mutex mutex; + }; + class Position { public: UInt64 binlog_pos; String binlog_name; GTIDSets gtid_sets; + UInt32 timestamp; - Position() : binlog_pos(0) { } + Position() : binlog_pos(0), timestamp(0) { } void update(BinlogEventPtr event); - void update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_); + void update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_, UInt32 binlog_time_); void dump(WriteBuffer & out) const; + void resetPendingGTID() { pending_gtid.reset(); } private: std::optional pending_gtid; diff --git a/src/Core/MySQL/tests/gtest_MySQLGtid.cpp b/src/Core/MySQL/tests/gtest_MySQLGtid.cpp new file mode 100644 index 00000000000..e31a87aaa39 --- /dev/null +++ b/src/Core/MySQL/tests/gtest_MySQLGtid.cpp @@ -0,0 +1,40 @@ +#include +#include + +using namespace DB; + + +GTEST_TEST(GTIDSetsContains, Tests) +{ + GTIDSets gtid_set, + contained1, contained2, contained3, contained4, contained5, + not_contained1, not_contained2, not_contained3, not_contained4, not_contained5, not_contained6; + + gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); + contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); + contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:2-3:11:47-49"); + contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:11"); + contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:47-49:60"); + contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:60"); + + not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60"); + not_contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:0-3:11:47-49"); + not_contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:99"); + not_contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:46-49:60"); + not_contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:99"); + not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60, 00000000-0000-0000-0000-000000000000"); + + + ASSERT_TRUE(gtid_set.contains(contained1)); + ASSERT_TRUE(gtid_set.contains(contained2)); + ASSERT_TRUE(gtid_set.contains(contained3)); + ASSERT_TRUE(gtid_set.contains(contained4)); + ASSERT_TRUE(gtid_set.contains(contained5)); + + ASSERT_FALSE(gtid_set.contains(not_contained1)); + ASSERT_FALSE(gtid_set.contains(not_contained2)); + ASSERT_FALSE(gtid_set.contains(not_contained3)); + ASSERT_FALSE(gtid_set.contains(not_contained4)); + ASSERT_FALSE(gtid_set.contains(not_contained5)); + ASSERT_FALSE(gtid_set.contains(not_contained6)); +} diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index 2f041134f06..aa60bdee28a 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -24,6 +24,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } @@ -162,6 +163,8 @@ void insertPostgreSQLValue( assert_cast(column).insert(Array(dimensions[1].begin(), dimensions[1].end())); break; } + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value type"); } } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 58b7cbab4c9..0e6da579b10 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -204,6 +204,8 @@ class IColumn; M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \ M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \ M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \ + M(UInt64, output_format_compression_level, 3, "Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when inserting to table function `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`.", 0) \ + M(UInt64, output_format_compression_zstd_window_log, 0, "Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression.", 0) \ \ M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \ M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \ @@ -352,6 +354,7 @@ class IColumn; M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ + M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \ diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index bb848bf1526..fa91e86b43d 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -10,13 +10,15 @@ #include "DataTypeDate32.h" #include "DataTypeDateTime.h" #include "DataTypeDateTime64.h" -#include "DataTypeEnum.h" #include "DataTypesDecimal.h" #include "DataTypeFixedString.h" #include "DataTypeNullable.h" #include "DataTypeString.h" #include "DataTypesNumber.h" +#include "DataTypeCustomGeo.h" +#include "DataTypeFactory.h" #include "IDataType.h" +#include namespace DB { @@ -118,6 +120,10 @@ DataTypePtr convertMySQLDataType(MultiEnum type_support, else if (precision <= DecimalUtils::max_precision) res = std::make_shared>(precision, scale); } + else if (type_name == "point") + { + res = DataTypeFactory::instance().get("Point"); + } /// Also String is fallback for all unknown types. if (!res) diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 6810f655116..9d0395e4217 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -14,20 +14,12 @@ #include #include #include +#include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace fs = std::filesystem; namespace DB diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8973b533720..ba1b2cdacad 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -227,11 +227,17 @@ LoadTaskPtr DatabaseOrdinary::startupDatabaseAsync( LoadJobSet startup_after, LoadingStrictnessLevel /*mode*/) { - // NOTE: this task is empty, but it is required for correct dependency handling (startup should be done after tables loading) auto job = makeLoadJob( std::move(startup_after), TablesLoaderBackgroundStartupPoolId, - fmt::format("startup Ordinary database {}", getDatabaseName())); + fmt::format("startup Ordinary database {}", getDatabaseName()), + ignoreDependencyFailure, + [] (AsyncLoader &, const LoadJobPtr &) + { + // NOTE: this job is no-op, but it is required for correct dependency handling + // 1) startup should be done after tables loading + // 2) load or startup errors for tables should not lead to not starting up the whole database + }); return startup_database_task = makeLoadTask(async_loader, {job}); } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index cbb080a0baa..99dd337189c 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -10,6 +10,7 @@ # include # include # include +# include # include # include # include @@ -39,10 +40,11 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, + const MySQLReplication::BinlogClientPtr & binlog_client_, std::unique_ptr settings_) : DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL(" + database_name_ + ")", context_) , settings(std::move(settings_)) - , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get()) + , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), binlog_client_, settings.get()) { } @@ -197,6 +199,7 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory) if (!engine->arguments) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name); + MySQLReplication::BinlogClientPtr binlog_client; StorageMySQL::Configuration configuration; ASTs & arguments = engine->arguments->children; auto mysql_settings = std::make_unique(); @@ -241,6 +244,12 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory) if (engine_define->settings) materialize_mode_settings->loadFromQuery(*engine_define); + if (materialize_mode_settings->use_binlog_client) + binlog_client = DB::MySQLReplication::BinlogClientFactory::instance().getClient( + configuration.host, configuration.port, configuration.username, configuration.password, + materialize_mode_settings->max_bytes_in_binlog_dispatcher_buffer, + materialize_mode_settings->max_flush_milliseconds_in_binlog_dispatcher); + if (args.uuid == UUIDHelpers::Nil) { auto print_create_ast = args.create_query.clone(); @@ -261,6 +270,7 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory) configuration.database, std::move(mysql_pool), std::move(client), + binlog_client, std::move(materialize_mode_settings)); }; factory.registerDatabase("MaterializeMySQL", create_fn); diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 895498723fd..4d7871d49d6 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ public: const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, + const MySQLReplication::BinlogClientPtr & binlog_client_, std::unique_ptr settings_); void rethrowExceptionIfNeeded() const; diff --git a/src/Databases/MySQL/MaterializedMySQLSettings.h b/src/Databases/MySQL/MaterializedMySQLSettings.h index 82342f8c76d..557d48be85b 100644 --- a/src/Databases/MySQL/MaterializedMySQLSettings.h +++ b/src/Databases/MySQL/MaterializedMySQLSettings.h @@ -17,6 +17,11 @@ class ASTStorage; M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \ M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \ M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \ + M(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \ + M(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \ + M(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \ + M(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \ + M(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 14cd89e1ff6..5834fb96dc6 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -26,14 +26,13 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include namespace DB { @@ -48,8 +47,43 @@ namespace ErrorCodes extern const int UNKNOWN_DATABASE; extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_READ_ALL_DATA; + extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int NETWORK_ERROR; + extern const int UNKNOWN_TABLE; + extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int THERE_IS_NO_QUERY; + extern const int QUERY_WAS_CANCELLED; + extern const int TABLE_ALREADY_EXISTS; + extern const int DATABASE_ALREADY_EXISTS; + extern const int DATABASE_NOT_EMPTY; + extern const int TABLE_IS_DROPPED; + extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT; + extern const int CANNOT_CREATE_CHARSET_CONVERTER; + extern const int UNKNOWN_FUNCTION; + extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_TYPE; + extern const int TIMEOUT_EXCEEDED; + extern const int MEMORY_LIMIT_EXCEEDED; + extern const int MYSQL_SYNTAX_ERROR; } +// USE MySQL ERROR CODE: +// https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html +constexpr int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT +constexpr int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT +constexpr int ER_BAD_DB_ERROR = 1049; /// NOLINT +constexpr int ER_MASTER_HAS_PURGED_REQUIRED_GTIDS = 1789; /// NOLINT +constexpr int ER_MASTER_FATAL_ERROR_READING_BINLOG = 1236; /// NOLINT + +// https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html +constexpr int CR_CONN_HOST_ERROR = 2003; /// NOLINT +constexpr int CR_SERVER_GONE_ERROR = 2006; /// NOLINT +constexpr int CR_SERVER_LOST = 2013; /// NOLINT +constexpr int ER_SERVER_SHUTDOWN = 1053; /// NOLINT +constexpr int ER_LOCK_DEADLOCK = 1213; /// NOLINT +constexpr int ER_LOCK_WAIT_TIMEOUT = 1205; /// NOLINT +constexpr int ER_OPTION_PREVENTS_STATEMENT = 1290; /// NOLINT + static constexpr auto MYSQL_BACKGROUND_THREAD_NAME = "MySQLDBSync"; static ContextMutablePtr createQueryContext(ContextPtr context) @@ -157,12 +191,68 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S } } +static bool shouldReconnectOnException(const std::exception_ptr & e) +{ + try + { + std::rethrow_exception(e); + } + catch (const mysqlxx::ConnectionFailed &) {} /// NOLINT + catch (const mysqlxx::ConnectionLost &) {} /// NOLINT + catch (const Poco::Net::ConnectionResetException &) {} /// NOLINT + catch (const Poco::Net::ConnectionRefusedException &) {} /// NOLINT + catch (const DB::NetException &) {} /// NOLINT + catch (const Poco::Net::NetException & e) + { + if (e.code() != POCO_ENETDOWN && + e.code() != POCO_ENETUNREACH && + e.code() != POCO_ENETRESET && + e.code() != POCO_ESYSNOTREADY) + return false; + } + catch (const mysqlxx::BadQuery & e) + { + // Lost connection to MySQL server during query + if (e.code() != CR_SERVER_LOST && + e.code() != ER_SERVER_SHUTDOWN && + e.code() != CR_SERVER_GONE_ERROR && + e.code() != CR_CONN_HOST_ERROR && + e.code() != ER_LOCK_DEADLOCK && + e.code() != ER_LOCK_WAIT_TIMEOUT && + e.code() != ER_OPTION_PREVENTS_STATEMENT) + return false; + } + catch (const mysqlxx::Exception & e) + { + // ER_SERVER_SHUTDOWN is thrown in different types under different conditions. + // E.g. checkError() in Common/mysqlxx/Exception.cpp will throw mysqlxx::Exception. + if (e.code() != CR_SERVER_LOST && e.code() != ER_SERVER_SHUTDOWN && e.code() != CR_SERVER_GONE_ERROR && e.code() != CR_CONN_HOST_ERROR) + return false; + } + catch (const Poco::Exception & e) + { + if (e.code() != ErrorCodes::NETWORK_ERROR && + e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED && + e.code() != ErrorCodes::UNKNOWN_TABLE && // Since we have ignored the DDL exception when the tables without primary key, insert into those tables will get UNKNOWN_TABLE. + e.code() != ErrorCodes::CANNOT_READ_ALL_DATA && + e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && + e.code() != ErrorCodes::TIMEOUT_EXCEEDED) + return false; + } + catch (...) + { + return false; + } + return true; +} + MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( ContextPtr context_, const String & database_name_, const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, + const MySQLReplication::BinlogClientPtr & binlog_client_, MaterializedMySQLSettings * settings_) : WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get("MaterializedMySQLSyncThread")) @@ -170,6 +260,7 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( , mysql_database_name(mysql_database_name_) , pool(std::move(pool_)) /// NOLINT , client(std::move(client_)) + , binlog_client(binlog_client_) , settings(settings_) { query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") "; @@ -216,14 +307,23 @@ void MaterializedMySQLSyncThread::synchronization() UInt64 elapsed_ms = watch.elapsedMilliseconds(); if (elapsed_ms < max_flush_time) { - BinlogEventPtr binlog_event = client.readOneBinlogEvent(max_flush_time - elapsed_ms); - if (binlog_event) + const auto timeout_ms = max_flush_time - elapsed_ms; + BinlogEventPtr binlog_event; + if (binlog) + binlog->tryReadEvent(binlog_event, timeout_ms); + else + binlog_event = client.readOneBinlogEvent(timeout_ms); + if (binlog_event && !ignoreEvent(binlog_event)) onEvent(buffers, binlog_event, metadata); } } catch (const Exception & e) { - if (e.code() != ErrorCodes::CANNOT_READ_ALL_DATA || settings->max_wait_time_when_mysql_unavailable < 0) + if (settings->max_wait_time_when_mysql_unavailable < 0) + throw; + bool binlog_was_purged = e.code() == ER_MASTER_FATAL_ERROR_READING_BINLOG || + e.code() == ER_MASTER_HAS_PURGED_REQUIRED_GTIDS; + if (!binlog_was_purged && !shouldReconnectOnException(std::current_exception())) throw; flushBuffersData(buffers, metadata); @@ -246,6 +346,7 @@ void MaterializedMySQLSyncThread::synchronization() catch (...) { client.disconnect(); + binlog = nullptr; tryLogCurrentException(log); setSynchronizationThreadException(std::current_exception()); } @@ -259,6 +360,7 @@ void MaterializedMySQLSyncThread::stopSynchronization() if (background_thread_pool->joinable()) background_thread_pool->join(); client.disconnect(); + binlog = nullptr; } } @@ -428,14 +530,6 @@ static inline void dumpDataForTables( } } -static inline UInt32 randomNumber() -{ - pcg64_fast rng{randomSeed()}; - std::uniform_int_distribution dist6( - std::numeric_limits::min(), std::numeric_limits::max()); - return static_cast(dist6(rng)); -} - bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) { bool opened_transaction = false; @@ -463,7 +557,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta if (!need_dumping_tables.empty()) { Position position; - position.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set); + position.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set, 0); metadata.transaction(position, [&]() { @@ -487,8 +581,20 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta if (opened_transaction) connection->query("COMMIT").execute(); - client.connect(); - client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum); + if (binlog_client) + { + binlog_client->setBinlogChecksum(metadata.binlog_checksum); + binlog = binlog_client->createBinlog(metadata.executed_gtid_set, + database_name, + {mysql_database_name}, + settings->max_bytes_in_binlog_queue, + settings->max_milliseconds_to_wait_in_binlog_queue); + } + else + { + client.connect(); + client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum); + } setSynchronizationThreadException(nullptr); return true; @@ -500,20 +606,11 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta if (opened_transaction) connection->query("ROLLBACK").execute(); - try - { + if (settings->max_wait_time_when_mysql_unavailable < 0) + throw; + + if (!shouldReconnectOnException(std::current_exception())) throw; - } - catch (const mysqlxx::ConnectionFailed & ex) - { - LOG_TRACE(log, "Connection to MySQL failed {}", ex.displayText()); - } - catch (const mysqlxx::BadQuery & e) - { - // Lost connection to MySQL server during query - if (e.code() != CR_SERVER_LOST || settings->max_wait_time_when_mysql_unavailable < 0) - throw; - } setSynchronizationThreadException(std::current_exception()); /// Avoid busy loop when MySQL is not available. @@ -524,17 +621,55 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta return false; } +bool MaterializedMySQLSyncThread::isTableIgnored(const String & table_name) const +{ + return !materialized_tables_list.empty() && !materialized_tables_list.contains(table_name); +} + +bool MaterializedMySQLSyncThread::ignoreEvent(const BinlogEventPtr & event) const +{ + switch (event->type()) + { + case MYSQL_WRITE_ROWS_EVENT: + case MYSQL_DELETE_ROWS_EVENT: + case MYSQL_UPDATE_ROWS_EVENT: + case MYSQL_UNPARSED_ROWS_EVENT: + { + auto table_name = static_cast(*event).table; + if (!table_name.empty() && isTableIgnored(table_name)) + { + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown event type: {}", magic_enum::enum_name(event->header.type)); + } + return true; + } + } break; + default: + break; + } + return false; +} + void MaterializedMySQLSyncThread::flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata) { if (buffers.data.empty()) return; - metadata.transaction(client.getPosition(), [&]() { buffers.commit(getContext()); }); + metadata.transaction(getPosition(), [&]() { buffers.commit(getContext()); }); const auto & position_message = [&]() { WriteBufferFromOwnString buf; - client.getPosition().dump(buf); + getPosition().dump(buf); return buf.str(); }; LOG_INFO(log, "MySQL executed position: \n {}", position_message()); @@ -783,10 +918,33 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt else if (receive_event->type() == MYSQL_QUERY_EVENT) { QueryEvent & query_event = static_cast(*receive_event); + /// Skip events for different databases if any + if (!query_event.query_database_name.empty() && query_event.query_database_name != mysql_database_name) + { + LOG_WARNING( + log, + "Skipped QueryEvent, current mysql database name: {}, ddl schema: {}, query: {}", + mysql_database_name, + query_event.query_database_name, + query_event.query); + return; + } + if (!query_event.query_table_name.empty() && isTableIgnored(query_event.query_table_name)) + { + LOG_WARNING(log, "Due to the table filter rules, query_event on {} is ignored.", database_name); + return; + } + Position position_before_ddl; - position_before_ddl.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set); + position_before_ddl.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set, query_event.header.timestamp); metadata.transaction(position_before_ddl, [&]() { buffers.commit(getContext()); }); - metadata.transaction(client.getPosition(),[&](){ executeDDLAtomic(query_event); }); + metadata.transaction(getPosition(),[&]() { executeDDLAtomic(query_event); }); + } + else if (receive_event->type() == MYSQL_UNPARSED_ROWS_EVENT) + { + UnparsedRowsEvent & unparsed_event = static_cast(*receive_event); + auto nested_event = unparsed_event.parse(); + onEvent(buffers, nested_event, metadata); } else { @@ -796,7 +954,10 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt /// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file. /// To ensure that the synchronization continues, we need to handle these events metadata.fetchMasterVariablesValue(pool.get(/* wait_timeout= */ UINT64_MAX)); - client.setBinlogChecksum(metadata.binlog_checksum); + if (binlog_client) + binlog_client->setBinlogChecksum(metadata.binlog_checksum); + else + client.setBinlogChecksum(metadata.binlog_checksum); } else if (receive_event->header.type != HEARTBEAT_EVENT) { @@ -827,7 +988,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even auto table_id = tryParseTableIDFromDDL(query, query_event.schema); if (!table_id.table_name.empty()) { - if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name)) + if (table_id.database_name != mysql_database_name || isTableIgnored(table_id.table_name)) { LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query); return; @@ -845,8 +1006,28 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even tryLogCurrentException(log); /// If some DDL query was not successfully parsed and executed - /// Then replication may fail on next binlog events anyway - if (exception.code() != ErrorCodes::SYNTAX_ERROR) + /// Then replication may fail on next binlog events anyway. + /// We can skip the error binlog evetns and continue to execute the right ones. + /// eg. The user creates a table without primary key and finds it is wrong, then + /// drops it and creates a new right one. We guarantee the right one can be executed. + + if (exception.code() != ErrorCodes::SYNTAX_ERROR && + exception.code() != ErrorCodes::MYSQL_SYNTAX_ERROR && + exception.code() != ErrorCodes::NOT_IMPLEMENTED && + exception.code() != ErrorCodes::UNKNOWN_TABLE && + exception.code() != ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY && + exception.code() != ErrorCodes::THERE_IS_NO_QUERY && + exception.code() != ErrorCodes::QUERY_WAS_CANCELLED && + exception.code() != ErrorCodes::TABLE_ALREADY_EXISTS && + exception.code() != ErrorCodes::UNKNOWN_DATABASE && + exception.code() != ErrorCodes::DATABASE_ALREADY_EXISTS && + exception.code() != ErrorCodes::DATABASE_NOT_EMPTY && + exception.code() != ErrorCodes::TABLE_IS_DROPPED && + exception.code() != ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT && + exception.code() != ErrorCodes::CANNOT_CREATE_CHARSET_CONVERTER && + exception.code() != ErrorCodes::UNKNOWN_FUNCTION && + exception.code() != ErrorCodes::UNKNOWN_IDENTIFIER && + exception.code() != ErrorCodes::UNKNOWN_TYPE) throw; } } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index 4abea5e72df..004a4d67d32 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -11,6 +11,7 @@ # include # include # include +# include # include # include # include @@ -45,6 +46,7 @@ public: const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, + const MySQLReplication::BinlogClientPtr & binlog_client_, MaterializedMySQLSettings * settings_); void stopSynchronization(); @@ -61,19 +63,12 @@ private: mutable mysqlxx::Pool pool; mutable MySQLClient client; + BinlogClientPtr binlog_client; + BinlogPtr binlog; MaterializedMySQLSettings * settings; String query_prefix; NameSet materialized_tables_list; - // USE MySQL ERROR CODE: - // https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html - const int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT - const int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT - const int ER_BAD_DB_ERROR = 1049; /// NOLINT - - // https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html - const int CR_SERVER_LOST = 2013; /// NOLINT - struct Buffers { String database; @@ -99,12 +94,16 @@ private: BufferAndSortingColumnsPtr getTableDataBuffer(const String & table, ContextPtr context); }; + Position getPosition() const { return binlog ? binlog->getPosition() : client.getPosition(); } void synchronization(); bool isCancelled() { return sync_quit.load(std::memory_order_relaxed); } bool prepareSynchronized(MaterializeMetadata & metadata); + bool isTableIgnored(const String & table_name) const; + bool ignoreEvent(const BinlogEventPtr & event) const; + void flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata); void onEvent(Buffers & buffers, const MySQLReplication::BinlogEventPtr & event, MaterializeMetadata & metadata); diff --git a/src/Databases/MySQL/MySQLBinlog.cpp b/src/Databases/MySQL/MySQLBinlog.cpp new file mode 100644 index 00000000000..3e3aca220bb --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlog.cpp @@ -0,0 +1,500 @@ +#include "MySQLBinlog.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +using namespace Replication; +using namespace Authentication; +using namespace ConnectionPhase; + +namespace ErrorCodes +{ + extern const int UNKNOWN_EXCEPTION; + extern const int UNKNOWN_PACKET_FROM_SERVER; + extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int CANNOT_READ_ALL_DATA; + extern const int LOGICAL_ERROR; + extern const int NETWORK_ERROR; +} + +namespace MySQLReplication +{ + +class WriteCommand : public IMySQLWritePacket +{ +public: + const char command; + const String query; + + WriteCommand(char command_, const String & query_) : command(command_), query(query_) { } + + size_t getPayloadSize() const override { return 1 + query.size(); } + + void writePayloadImpl(WriteBuffer & buffer) const override + { + buffer.write(command); + if (!query.empty()) + buffer.write(query.data(), query.size()); + } +}; + +IBinlog::Checksum IBinlog::checksumFromString(const String & checksum) +{ + auto str = Poco::toUpper(checksum); + if (str == "CRC32") + return IBinlog::CRC32; + if (str != "NONE") + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown checksum: {}", checksum); + return IBinlog::NONE; +} + +void BinlogParser::setChecksum(Checksum checksum) +{ + switch (checksum) + { + case Checksum::CRC32: + checksum_signature_length = 4; + break; + case Checksum::NONE: + checksum_signature_length = 0; + break; + } +} + +void BinlogParser::parseEvent(EventHeader & event_header, ReadBuffer & event_payload) +{ + switch (event_header.type) + { + case FORMAT_DESCRIPTION_EVENT: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + break; + } + case ROTATE_EVENT: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + break; + } + case QUERY_EVENT: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + + auto query = std::static_pointer_cast(event); + switch (query->typ) + { + case QUERY_EVENT_MULTI_TXN_FLAG: + case QUERY_EVENT_XA: + case QUERY_SAVEPOINT: + { + event = std::make_shared(EventHeader(query->header)); + break; + } + default: + { + String quoted_query = query->query; + tryQuoteUnrecognizedTokens(quoted_query); + tryConvertStringLiterals(quoted_query); + auto table_id = tryParseTableIDFromDDL(query->query, query->schema); + query->query_database_name = table_id.database_name; + query->query_table_name = table_id.table_name; + break; + } + } + break; + } + case XID_EVENT: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + break; + } + case TABLE_MAP_EVENT: + { + TableMapEventHeader map_event_header; + map_event_header.parse(event_payload); + event = std::make_shared(EventHeader(event_header), map_event_header, flavor_charset); + try + { + event->parseEvent(event_payload); + auto table_map = std::static_pointer_cast(event); + table_maps[table_map->table_id] = table_map; + } + catch (const Poco::Exception & exc) + { + /// Ignore parsing issues + if (exc.code() != ErrorCodes::UNKNOWN_EXCEPTION) + throw; + event = std::make_shared(std::move(event_header)); + event->parseEvent(event_payload); + } + break; + } + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + { + RowsEventHeader rows_header(event_header.type); + rows_header.parse(event_payload); + if (table_maps.contains(rows_header.table_id)) + event = std::make_shared(table_maps.at(rows_header.table_id), EventHeader(event_header), rows_header); + else + event = std::make_shared(std::move(event_header)); + event->parseEvent(event_payload); + if (rows_header.flags & ROWS_END_OF_STATEMENT) + table_maps.clear(); + break; + } + case GTID_EVENT: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + break; + } + default: + { + event = std::make_shared(EventHeader(event_header)); + event->parseEvent(event_payload); + break; + } + } + updatePosition(event, position); +} + +void BinlogParser::updatePosition(const BinlogEventPtr & event, Position & position) +{ + const UInt64 binlog_pos_prev = position.binlog_pos; + position.binlog_pos = event->header.log_pos; + if (event->header.timestamp > 0) + position.timestamp = event->header.timestamp; + + switch (event->header.type) + { + case QUERY_EVENT: + if (event->type() == MYSQL_UNHANDLED_EVENT) + break; + [[fallthrough]]; + case GTID_EVENT: + case XID_EVENT: + case ROTATE_EVENT: + position.update(event); + break; + default: + break; + } + + if (event->header.type != ROTATE_EVENT) + { + /// UInt32 overflow when Pos > End_log_pos + /// https://dev.mysql.com/doc/refman/8.0/en/show-binlog-events.html + /// binlog_pos - The position at which the next event begins, which is equal to Pos plus the size of the event + const UInt64 binlog_pos_correct = binlog_pos_prev + event->header.event_size; + if (position.binlog_pos < binlog_pos_prev && binlog_pos_correct > std::numeric_limits::max()) + position.binlog_pos = binlog_pos_correct; + } +} + +bool BinlogParser::isNew(const Position & older, const Position & newer) +{ + if (older.gtid_sets.contains(newer.gtid_sets)) + return false; + /// Check if all sets in newer position have the same UUID from older sets + std::set older_set; + for (const auto & set : older.gtid_sets.sets) + older_set.insert(set.uuid); + for (const auto & set : newer.gtid_sets.sets) + { + if (!older_set.contains(set.uuid)) + return false; + } + return true; +} + +void BinlogFromSocket::connect(const String & host, UInt16 port, const String & user, const String & password) +{ + if (connected) + disconnect(); + + const Poco::Timespan connection_timeout(10'000'000'000); + const Poco::Timespan receive_timeout(5'000'000'000); + const Poco::Timespan send_timeout(5'000'000'000); + + socket = std::make_unique(); + address = DNSResolver::instance().resolveAddress(host, port); + socket->connect(*address, connection_timeout); + socket->setReceiveTimeout(receive_timeout); + socket->setSendTimeout(send_timeout); + socket->setNoDelay(true); + connected = true; + + in = std::make_unique(*socket); + out = std::make_unique(*socket); + packet_endpoint = std::make_shared(*in, *out, sequence_id); + + handshake(user, password); +} + +void BinlogFromSocket::disconnect() +{ + in = nullptr; + out = nullptr; + if (socket) + socket->close(); + socket = nullptr; + connected = false; + sequence_id = 0; + + GTIDSets sets; + position.gtid_sets = sets; + position.resetPendingGTID(); +} + +/// https://dev.mysql.com/doc/internals/en/connection-phase-packets.html +void BinlogFromSocket::handshake(const String & user, const String & password) +{ + const String mysql_native_password = "mysql_native_password"; + Handshake handshake; + packet_endpoint->receivePacket(handshake); + if (handshake.auth_plugin_name != mysql_native_password) + { + throw Exception( + ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, + "Only support {} auth plugin name, but got {}", + mysql_native_password, + handshake.auth_plugin_name); + } + + Native41 native41(password, handshake.auth_plugin_data); + String auth_plugin_data = native41.getAuthPluginData(); + + const UInt8 charset_utf8 = 33; + HandshakeResponse handshake_response( + client_capabilities, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password); + packet_endpoint->sendPacket(handshake_response, true); + + ResponsePacket packet_response(client_capabilities, true); + packet_endpoint->receivePacket(packet_response); + packet_endpoint->resetSequenceId(); + + if (packet_response.getType() == PACKET_ERR) + throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); + else if (packet_response.getType() == PACKET_AUTH_SWITCH) + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Access denied for user {}", user); +} + +void BinlogFromSocket::writeCommand(char command, const String & query) +{ + WriteCommand write_command(command, query); + packet_endpoint->sendPacket(write_command, true); + + ResponsePacket packet_response(client_capabilities); + packet_endpoint->receivePacket(packet_response); + switch (packet_response.getType()) + { + case PACKET_ERR: + throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); + case PACKET_OK: + break; + default: + break; + } + packet_endpoint->resetSequenceId(); +} + +void BinlogFromSocket::registerSlaveOnMaster(UInt32 slave_id) +{ + RegisterSlave register_slave(slave_id); + packet_endpoint->sendPacket(register_slave, true); + + ResponsePacket packet_response(client_capabilities); + packet_endpoint->receivePacket(packet_response); + packet_endpoint->resetSequenceId(); + if (packet_response.getType() == PACKET_ERR) + throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); +} + +void BinlogFromSocket::start(UInt32 slave_id, const String & executed_gtid_set) +{ + if (!connected) + return; + + /// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments: + /// Make a notice to the server that this client is checksum-aware. + /// It does not need the first fake Rotate necessary checksummed. + writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'"); + + /// Set heartbeat 1s + const UInt64 period_ns = 1'000'000'000; + writeCommand(Command::COM_QUERY, "SET @master_heartbeat_period = " + std::to_string(period_ns)); + + /// Register slave. + registerSlaveOnMaster(slave_id); + + position.gtid_sets = {}; + position.gtid_sets.parse(executed_gtid_set); + + BinlogDumpGTID binlog_dump(slave_id, position.gtid_sets.toPayload()); + packet_endpoint->sendPacket(binlog_dump, true); +} + +class ReadPacketFromSocket : public IMySQLReadPacket +{ +public: + using ReadPayloadFunc = std::function; + explicit ReadPacketFromSocket(ReadPayloadFunc fn) : read_payload_func(std::move(fn)) { } + void readPayloadImpl(ReadBuffer & payload) override; + ReadPayloadFunc read_payload_func; +}; + +void ReadPacketFromSocket::readPayloadImpl(ReadBuffer & payload) +{ + if (payload.eof()) + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF."); + + UInt8 header = static_cast(*payload.position()); + switch (header) // NOLINT(bugprone-switch-missing-default-case) + { + case PACKET_EOF: + throw ReplicationError(ErrorCodes::CANNOT_READ_ALL_DATA, "Master maybe lost"); + case PACKET_ERR: + { + ERRPacket err; + err.readPayloadWithUnpacked(payload); + throw ReplicationError::createDeprecated(err.error_message, ErrorCodes::UNKNOWN_EXCEPTION); + } + default: + break; + } + /// Skip the generic response packets header flag + payload.ignore(1); + read_payload_func(payload); +} + +bool BinlogFromSocket::tryReadEvent(BinlogEventPtr & to, UInt64 ms) +{ + ReadPacketFromSocket packet([this](ReadBuffer & payload) + { + MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length); + + EventHeader event_header; + event_header.parse(event_payload); + + parseEvent(event_header, event_payload); + }); + + if (packet_endpoint && packet_endpoint->tryReceivePacket(packet, ms)) + { + to = event; + return static_cast(to); + } + + return false; +} + +void BinlogFromFile::open(const String & filename) +{ + in = std::make_unique(filename); + assertString("\xfe\x62\x69\x6e", *in); /// magic number +} + +bool BinlogFromFile::tryReadEvent(BinlogEventPtr & to, UInt64 /*ms*/) +{ + if (in && !in->eof()) + { + EventHeader event_header; + event_header.parse(*in); + + LimitReadBuffer limit_read_buffer(*in, event_header.event_size - EVENT_HEADER_LENGTH, /* throw_exception */ false, /* exact_limit */ {}); + MySQLBinlogEventReadBuffer event_payload(limit_read_buffer, checksum_signature_length); + parseEvent(event_header, event_payload); + to = event; + return static_cast(to); + } + + return false; +} + +BinlogFromFileFactory::BinlogFromFileFactory(const String & filename_) + : filename(filename_) +{ +} + +BinlogPtr BinlogFromFileFactory::createBinlog(const String & executed_gtid_set) +{ + auto ret = std::make_shared(); + ret->open(filename); + if (!executed_gtid_set.empty()) + { + /// NOTE: Used for testing only! + GTIDSets sets; + sets.parse(executed_gtid_set); + if (sets.sets.size() != 1 || sets.sets[0].intervals.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many intervals: {}", executed_gtid_set); + BinlogEventPtr event; + while (ret->tryReadEvent(event, 0)) + { + const auto & s = ret->getPosition().gtid_sets.sets; + if (!s.empty() && !s[0].intervals.empty() && s[0].intervals[0].end == sets.sets[0].intervals[0].end) + break; + } + + auto pos = ret->getPosition(); + pos.gtid_sets.sets.front().intervals.front().start = sets.sets.front().intervals.front().start; + ret->setPosition(pos); + } + return ret; +} + +BinlogFromSocketFactory::BinlogFromSocketFactory(const String & host_, UInt16 port_, const String & user_, const String & password_) + : host(host_) + , port(port_) + , user(user_) + , password(password_) +{ +} + +BinlogPtr BinlogFromSocketFactory::createBinlog(const String & executed_gtid_set) +{ + auto ret = std::make_shared(); + ret->connect(host, port, user, password); + ret->start(randomNumber(), executed_gtid_set); + auto pos = ret->getPosition(); + if (pos.gtid_sets.sets.empty() || pos.gtid_sets.sets.front().intervals.front().start != 1) + throw Exception(ErrorCodes::NETWORK_ERROR, "Could not create: Wrong executed_gtid_set: {} -> {}", executed_gtid_set, pos.gtid_sets.toString()); + return ret; +} + +/// Should be in MySQLReplication namespace +bool operator==(const Position & left, const Position & right) +{ + return left.binlog_name == right.binlog_name && + left.binlog_pos == right.binlog_pos && + left.gtid_sets == right.gtid_sets; +} + +} +} diff --git a/src/Databases/MySQL/MySQLBinlog.h b/src/Databases/MySQL/MySQLBinlog.h new file mode 100644 index 00000000000..0b8f7543590 --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlog.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +using namespace MySQLProtocol; +using namespace Generic; + +namespace MySQLReplication +{ + +class IBinlog +{ +public: + virtual ~IBinlog() = default; + virtual bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) = 0; + virtual Position getPosition() const = 0; + enum Checksum : UInt8 + { + NONE = 0, + CRC32 = 1 + }; + virtual void setChecksum(Checksum /*checksum*/) { } + static Checksum checksumFromString(const String & checksum); +}; + +using BinlogPtr = std::shared_ptr; + +class BinlogParser : public IBinlog +{ +public: + Position getPosition() const override { return position; } + void setPosition(const Position & position_) { position = position_; } + void setChecksum(Checksum checksum) override; + static void updatePosition(const BinlogEventPtr & event, Position & position); + /// Checks if \a older is older position than \a newer + static bool isNew(const Position & older, const Position & newer); + +protected: + Position position; + BinlogEventPtr event; + std::map> table_maps; + size_t checksum_signature_length = 4; + MySQLCharsetPtr flavor_charset = std::make_shared(); + void parseEvent(EventHeader & event_header, ReadBuffer & event_payload); +}; + +class BinlogFromSocket : public BinlogParser +{ +public: + void connect(const String & host, UInt16 port, const String & user, const String & password); + void start(UInt32 slave_id, const String & executed_gtid_set); + bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override; + +private: + void disconnect(); + bool connected = false; + uint8_t sequence_id = 0; + const uint32_t client_capabilities = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; + + std::unique_ptr in; + std::unique_ptr out; + std::unique_ptr socket; + std::optional address; + std::shared_ptr packet_endpoint; + + void handshake(const String & user, const String & password); + void registerSlaveOnMaster(UInt32 slave_id); + void writeCommand(char command, const String & query); +}; + +class BinlogFromFile : public BinlogParser +{ +public: + void open(const String & filename); + bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override; + +private: + std::unique_ptr in; +}; + +class IBinlogFactory +{ +public: + virtual ~IBinlogFactory() = default; + virtual BinlogPtr createBinlog(const String & executed_gtid_set) = 0; +}; + +using BinlogFactoryPtr = std::shared_ptr; + +class BinlogFromFileFactory : public IBinlogFactory +{ +public: + BinlogFromFileFactory(const String & filename_); + BinlogPtr createBinlog(const String & executed_gtid_set) override; + +private: + const String filename; +}; + +class BinlogFromSocketFactory : public IBinlogFactory +{ +public: + BinlogFromSocketFactory(const String & host_, UInt16 port_, const String & user_, const String & password_); + BinlogPtr createBinlog(const String & executed_gtid_set) override; + +private: + const String host; + const UInt16 port; + const String user; + const String password; +}; + +bool operator==(const Position & left, const Position & right); + +} +} diff --git a/src/Databases/MySQL/MySQLBinlogClient.cpp b/src/Databases/MySQL/MySQLBinlogClient.cpp new file mode 100644 index 00000000000..e7d707f76ce --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogClient.cpp @@ -0,0 +1,104 @@ +#include "MySQLBinlogClient.h" +#include + +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB::MySQLReplication +{ + +BinlogClient::BinlogClient(const BinlogFactoryPtr & factory_, + const String & name, + UInt64 max_bytes_in_buffer_, + UInt64 max_flush_ms_) + : factory(factory_) + , binlog_client_name(name) + , max_bytes_in_buffer(max_bytes_in_buffer_) + , max_flush_ms(max_flush_ms_) + , logger(&Poco::Logger::get("BinlogClient(" + name + ")")) +{ +} + +BinlogPtr BinlogClient::createBinlog(const String & executed_gtid_set, + const String & name, + const NameSet & mysql_database_names, + size_t max_bytes, + UInt64 max_waiting_ms) +{ + std::lock_guard lock(mutex); + BinlogPtr ret; + for (auto it = dispatchers.begin(); it != dispatchers.end();) + { + auto & dispatcher = *it; + if (!ret) + { + const auto metadata = dispatcher->getDispatcherMetadata(); + LOG_DEBUG(logger, "({} -> {}): Trying dispatcher: {}, size: {} -> {}:{}.{}", + name, executed_gtid_set, metadata.name, metadata.binlogs.size(), + metadata.position.binlog_name, metadata.position.gtid_sets.toString(), metadata.position.binlog_pos); + ret = dispatcher->attach(executed_gtid_set, name, mysql_database_names, max_bytes, max_waiting_ms); + if (ret) + LOG_DEBUG(logger, "({} -> {}): Reused dispatcher: {}, size: {} -> {}:{}.{}", + name, executed_gtid_set, metadata.name, metadata.binlogs.size(), + metadata.position.binlog_name, metadata.position.gtid_sets.toString(), metadata.position.binlog_pos); + } + + if (dispatcher->cleanupBinlogsAndStop()) + { + const auto metadata = dispatcher->getDispatcherMetadata(); + LOG_DEBUG(logger, "({} -> {}): Deleting dispatcher: {}, size: {}, total dispatchers: {}", + name, executed_gtid_set, metadata.name, metadata.binlogs.size(), dispatchers.size()); + it = dispatchers.erase(it); + continue; + } + ++it; + } + + if (!ret) + { + String dispatcher_name = name + ":" + std::to_string(dispatchers_count++); + LOG_DEBUG(logger, "({} -> {}): Creating dispatcher: {}, total dispatchers: {}", + name, executed_gtid_set, dispatcher_name, dispatchers.size()); + auto dispatcher = std::make_shared(dispatcher_name, max_bytes_in_buffer, max_flush_ms); + if (!binlog_checksum.empty()) + dispatcher->setBinlogChecksum(binlog_checksum); + for (const auto & it : dispatchers) + dispatcher->syncTo(it); + ret = dispatcher->start(factory->createBinlog(executed_gtid_set), name, mysql_database_names, max_bytes, max_waiting_ms); + if (!ret) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not create binlog: {}", executed_gtid_set); + dispatchers.push_back(dispatcher); + } + + return ret; +} + +BinlogClient::Metadata BinlogClient::getMetadata() const +{ + std::lock_guard lock(mutex); + Metadata ret; + ret.binlog_client_name = binlog_client_name; + for (const auto & dispatcher : dispatchers) + { + auto metadata = dispatcher->getDispatcherMetadata(); + if (!metadata.binlogs.empty()) + ret.dispatchers.push_back(metadata); + } + return ret; +} + +void BinlogClient::setBinlogChecksum(const String & checksum) +{ + std::lock_guard lock(mutex); + if (binlog_checksum != checksum) + { + LOG_DEBUG(logger, "Setting binlog_checksum: {} -> {}, total dispatchers: {}", binlog_checksum, checksum, dispatchers.size()); + binlog_checksum = checksum; + for (const auto & dispatcher : dispatchers) + dispatcher->setBinlogChecksum(checksum); + } +} + +} diff --git a/src/Databases/MySQL/MySQLBinlogClient.h b/src/Databases/MySQL/MySQLBinlogClient.h new file mode 100644 index 00000000000..b76934d08cf --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogClient.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +namespace DB::MySQLReplication +{ + +/** It is supposed to reduce the number of connections to remote MySQL binlog by reusing one connection between several consumers. + * Such reusing of the connection makes the time of reading from the remote binlog independent to number of the consumers. + * It tracks a list of BinlogEventsDispatcher instances for consumers with different binlog position. + * The dispatchers with the same binlog position will be merged to one. + */ +class BinlogClient +{ +public: + BinlogClient(const BinlogFactoryPtr & factory, + const String & name = {}, + UInt64 max_bytes_in_buffer_ = DBMS_DEFAULT_BUFFER_SIZE, + UInt64 max_flush_ms_ = 1000); + BinlogClient(const BinlogClient & other) = delete; + ~BinlogClient() = default; + BinlogClient & operator=(const BinlogClient & other) = delete; + + /// Creates a binlog to receive events + BinlogPtr createBinlog(const String & executed_gtid_set = {}, + const String & name = {}, + const NameSet & mysql_database_names = {}, + size_t max_bytes = 0, + UInt64 max_waiting_ms = 0); + + /// The binlog checksum is related to entire connection + void setBinlogChecksum(const String & checksum); + + struct Metadata + { + String binlog_client_name; + std::vector dispatchers; + }; + /// Returns only not empty dispatchers + Metadata getMetadata() const; + +private: + BinlogFactoryPtr factory; + const String binlog_client_name; + UInt64 max_bytes_in_buffer = 0; + UInt64 max_flush_ms = 0; + std::vector dispatchers; + String binlog_checksum; + mutable std::mutex mutex; + Poco::Logger * logger = nullptr; + int dispatchers_count = 0; +}; + +using BinlogClientPtr = std::shared_ptr; + +} diff --git a/src/Databases/MySQL/MySQLBinlogClientFactory.cpp b/src/Databases/MySQL/MySQLBinlogClientFactory.cpp new file mode 100644 index 00000000000..03a777ff352 --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogClientFactory.cpp @@ -0,0 +1,46 @@ +#include + +namespace DB::MySQLReplication +{ + +BinlogClientFactory & BinlogClientFactory::instance() +{ + static BinlogClientFactory ret; + return ret; +} + +BinlogClientPtr BinlogClientFactory::getClient(const String & host, UInt16 port, const String & user, const String & password, UInt64 max_bytes_in_buffer, UInt64 max_flush_ms) +{ + std::lock_guard lock(mutex); + String binlog_client_name; + WriteBufferFromString stream(binlog_client_name); + stream << user << "@" << host << ":" << port; + stream.finalize(); + String binlog_client_key = binlog_client_name + ":" + password; + auto it = clients.find(binlog_client_key); + BinlogClientPtr ret = it != clients.end() ? it->second.lock() : nullptr; + if (ret) + return ret; + auto factory = std::make_shared(host, port, user, password); + auto client = std::make_shared(factory, binlog_client_name, max_bytes_in_buffer, max_flush_ms); + clients[binlog_client_key] = client; + return client; +} + +std::vector BinlogClientFactory::getMetadata() const +{ + std::lock_guard lock(mutex); + std::vector ret; + for (const auto & it : clients) + { + if (auto c = it.second.lock()) + { + auto metadata = c->getMetadata(); + if (!metadata.dispatchers.empty()) + ret.push_back(metadata); + } + } + return ret; +} + +} diff --git a/src/Databases/MySQL/MySQLBinlogClientFactory.h b/src/Databases/MySQL/MySQLBinlogClientFactory.h new file mode 100644 index 00000000000..544b88e3201 --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogClientFactory.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include + +namespace DB::MySQLReplication +{ + +/** Global instance to create or reuse MySQL Binlog Clients. + * If a binlog client already exists for specific params, + * it will be returned and reused to read binlog events from MySQL. + * Otherwise new instance will be created. + */ +class BinlogClientFactory final : boost::noncopyable +{ +public: + static BinlogClientFactory & instance(); + + BinlogClientPtr getClient(const String & host, + UInt16 port, + const String & user, + const String & password, + UInt64 max_bytes_in_buffer, + UInt64 max_flush_ms); + + /// Returns info of all registered clients + std::vector getMetadata() const; + +private: + BinlogClientFactory() = default; + + // Keeps track of already destroyed clients + std::unordered_map> clients; + mutable std::mutex mutex; +}; + +} diff --git a/src/Databases/MySQL/MySQLBinlogEventsDispatcher.cpp b/src/Databases/MySQL/MySQLBinlogEventsDispatcher.cpp new file mode 100644 index 00000000000..4af307f9c0f --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogEventsDispatcher.cpp @@ -0,0 +1,626 @@ +#include "MySQLBinlogEventsDispatcher.h" +#include +#include + +namespace DB::ErrorCodes +{ + extern const int UNKNOWN_EXCEPTION; + extern const int TIMEOUT_EXCEEDED; +} + +namespace DB::MySQLReplication +{ + +class BinlogFromDispatcher : public IBinlog +{ +public: + BinlogFromDispatcher(const String & name_, const NameSet & mysql_database_names_, size_t max_bytes_, UInt64 max_waiting_ms_) + : name(name_) + , mysql_database_names(mysql_database_names_) + , max_bytes(max_bytes_) + , max_waiting_ms(max_waiting_ms_) + , logger(&Poco::Logger::get("BinlogFromDispatcher(" + name + ")")) + { + } + + ~BinlogFromDispatcher() override + { + stop(); + } + + void stop() + { + { + std::lock_guard lock(mutex); + if (is_cancelled) + return; + is_cancelled = true; + } + cv.notify_all(); + } + + std::string getName() const + { + return name; + } + + bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override; + Position getPosition() const override; + void setPosition(const Position & initial, const Position & wait); + void setException(const std::exception_ptr & exception_); + void push(const BinlogEventsDispatcher::Buffer & buffer); + BinlogEventsDispatcher::BinlogMetadata getBinlogMetadata() const; + +private: + const String name; + const NameSet mysql_database_names; + const size_t max_bytes = 0; + const UInt64 max_waiting_ms = 0; + + Position position; + GTIDSets gtid_sets_wait; + + BinlogEventsDispatcher::Buffer buffer; + mutable std::mutex mutex; + + std::condition_variable cv; + bool is_cancelled = false; + Poco::Logger * logger = nullptr; + std::exception_ptr exception; +}; + +static String getBinlogNames(const std::vector> & binlogs) +{ + std::vector names; + for (const auto & it : binlogs) + { + if (auto binlog = it.lock()) + names.push_back(binlog->getName()); + } + return boost::algorithm::join(names, ", "); +} + +BinlogEventsDispatcher::BinlogEventsDispatcher(const String & logger_name_, size_t max_bytes_in_buffer_, UInt64 max_flush_ms_) + : logger_name(logger_name_) + , max_bytes_in_buffer(max_bytes_in_buffer_) + , max_flush_ms(max_flush_ms_) + , logger(&Poco::Logger::get("BinlogEventsDispatcher(" + logger_name + ")")) + , dispatching_thread(std::make_unique([this]() { dispatchEvents(); })) +{ +} + +BinlogEventsDispatcher::~BinlogEventsDispatcher() +{ + { + std::lock_guard lock(mutex); + is_cancelled = true; + auto exc = std::make_exception_ptr(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Dispatcher {} has been already destroyed", logger_name)); + try + { + cleanupLocked([&](const auto & binlog) + { + /// Notify the binlogs that the dispatcher is already destroyed + /// and it needs to recreate new binlogs if needed + binlog->setException(exc); + }); + } + catch (const std::exception & exc) + { + LOG_ERROR(logger, "Unexpected exception: {}", exc.what()); + } + } + cv.notify_all(); + if (dispatching_thread) + dispatching_thread->join(); +} + +static void flushTimers(Stopwatch & watch, UInt64 & total_time, UInt64 & size, float & size_per_sec, UInt64 & bytes, float & bytes_per_sec, float threshold_flush, float threshold_reset) +{ + total_time += watch.elapsedMicroseconds(); + const float elapsed_seconds = total_time * 1e-6f; + if (elapsed_seconds >= threshold_flush) + { + size_per_sec = size / elapsed_seconds; + bytes_per_sec = bytes / elapsed_seconds; + } + if (elapsed_seconds >= threshold_reset) + { + size = 0; + bytes = 0; + total_time = 0; + } +} + +void BinlogEventsDispatcher::flushBufferLocked() +{ + Stopwatch watch; + if (buffer.bytes) + cleanupLocked([&](const auto & b) { b->push(buffer); }); + events_flush += buffer.events.size(); + bytes_flush += buffer.bytes; + flushTimers(watch, events_flush_total_time, events_flush, events_flush_per_sec, bytes_flush, bytes_flush_per_sec, 0.1f, 1.0); + buffer = {}; +} + +static bool isDispatcherEventIgnored(const BinlogEventPtr & event) +{ + switch (event->header.type) + { + /// Sending to all databases: + case GTID_EVENT: /// Catch up requested executed gtid set, used only in BinlogFromDispatcher + case ROTATE_EVENT: /// Change binlog_checksum + case XID_EVENT: /// Commit transaction + /// Sending to all attached binlogs without filtering on dispatcher thread + /// to keep the connection as up-to-date as possible, + /// but these events should be filtered on databases' threads + /// and sent only to requested databases: + case QUERY_EVENT: /// Apply DDL + case WRITE_ROWS_EVENT_V1: /// Apply DML + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + return false; + default: + break; + } + return true; +} + +void BinlogEventsDispatcher::dispatchEvents() +{ + LOG_TRACE(logger, "{}: started", __FUNCTION__); + BinlogEventPtr event; + BinlogPtr binlog_; + Stopwatch watch; + UInt64 events_read = 0; + UInt64 bytes_read = 0; + UInt64 events_read_total_time = 0; + Stopwatch watch_events_read; + + while (!is_cancelled) + { + try + { + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return is_cancelled || (binlog_read_from && !binlogs.empty()); }); + if (is_cancelled) + break; + + for (auto it = sync_to.begin(); it != sync_to.end() && !binlogs.empty();) + { + if (auto d = it->lock()) + { + /// If we can catch up the position of a dispatcher we synced to, + /// need to move all binlogs out + if (trySyncLocked(d)) + { + /// Don't keep connection longer than needed + stopLocked(); + break; + } + ++it; + } + else + { + it = sync_to.erase(it); + } + } + + if (binlog_read_from) + binlog_read_from->setChecksum(binlog_checksum); + binlog_ = binlog_read_from; + if (watch.elapsedMilliseconds() >= max_flush_ms || buffer.bytes >= max_bytes_in_buffer) + { + flushBufferLocked(); + watch.restart(); + } + } + + watch_events_read.restart(); + if (!is_cancelled && binlog_ && binlog_->tryReadEvent(event, max_flush_ms) && event) + { + ++events_read; + bytes_read += event->header.event_size; + { + std::lock_guard lock(mutex); + flushTimers(watch_events_read, events_read_total_time, events_read, events_read_per_sec, bytes_read, bytes_read_per_sec, 1.0, 5.0); + BinlogParser::updatePosition(event, position); + /// Ignore meaningless events + if (isDispatcherEventIgnored(event)) + continue; + buffer.events.push_back(event); + buffer.bytes += event->header.event_size; + buffer.position = position; + /// Deliver ROTATE event ASAP if there binlog_checksum should be changed + if (event->header.type == ROTATE_EVENT) + flushBufferLocked(); + } + } + } + catch (const std::exception & exc) + { + std::lock_guard lock(mutex); + LOG_ERROR(logger, "Exception: {}", exc.what()); + stopLocked(); + /// All attached binlogs should be recreated + cleanupLocked([&](const auto & b) { b->setException(std::current_exception()); }); + binlogs.clear(); + buffer = {}; + position = {}; + } + } + LOG_TRACE(logger, "{}: finished", __FUNCTION__); +} + +bool BinlogEventsDispatcher::cleanupLocked(const std::function & binlog)> & fn) +{ + for (auto it = binlogs.begin(); it != binlogs.end();) + { + if (auto binlog = it->lock()) + { + if (fn) + fn(binlog); + ++it; + } + else + { + it = binlogs.erase(it); + } + } + + return binlogs.empty(); +} + +bool BinlogEventsDispatcher::cleanupBinlogsAndStop() +{ + std::lock_guard lock(mutex); + const bool is_empty = cleanupLocked(); + if (is_empty && binlog_read_from) + stopLocked(); + return is_empty; +} + +void BinlogEventsDispatcher::stopLocked() +{ + if (!binlog_read_from) + { + LOG_DEBUG(logger, "Could not stop. Already stopped"); + return; + } + + cleanupLocked(); + binlog_read_from = nullptr; + LOG_DEBUG(logger, "Stopped: {}:{}.{}: ({})", position.binlog_name, position.gtid_sets.toString(), position.binlog_pos, getBinlogNames(binlogs)); +} + +BinlogPtr BinlogEventsDispatcher::createBinlogLocked(const String & name_, + const NameSet & mysql_database_names, + size_t max_bytes, + UInt64 max_waiting_ms, + const Position & pos_initial, + const Position & pos_wait) +{ + static int client_cnt = 0; + const String client_id = !name_.empty() ? name_ : "binlog_" + std::to_string(++client_cnt); + auto binlog = std::make_shared(client_id, mysql_database_names, max_bytes, max_waiting_ms); + binlogs.push_back(binlog); + binlog->setPosition(pos_initial, pos_wait); + LOG_DEBUG(logger, "Created binlog: {} -> {}", name_, binlog->getPosition().gtid_sets.toString()); + return binlog; +} + +BinlogPtr BinlogEventsDispatcher::start(const BinlogPtr & binlog_read_from_, + const String & name_, + const NameSet & mysql_database_names, + size_t max_bytes, + UInt64 max_waiting_ms) +{ + BinlogPtr ret; + { + std::lock_guard lock(mutex); + if (is_started) + return {}; + binlog_read_from = binlog_read_from_; + /// It is used for catching up + /// binlog_read_from should return position with requested executed GTID set: 1-N + position = binlog_read_from->getPosition(); + ret = createBinlogLocked(name_, mysql_database_names, max_bytes, max_waiting_ms, position); + is_started = true; + } + cv.notify_all(); + return ret; +} + +BinlogPtr BinlogEventsDispatcher::attach(const String & executed_gtid_set, + const String & name_, + const NameSet & mysql_database_names, + size_t max_bytes, + UInt64 max_waiting_ms) +{ + BinlogPtr ret; + { + std::lock_guard lock(mutex); + /// Check if binlog_read_from can be reused: + /// Attach to only active dispatchers + /// and if executed_gtid_set is higher value than current + if (!binlog_read_from || !is_started || cleanupLocked() || executed_gtid_set.empty()) + return {}; + Position pos_wait; + pos_wait.gtid_sets.parse(executed_gtid_set); + if (!BinlogParser::isNew(position, pos_wait)) + return {}; + ret = createBinlogLocked(name_, mysql_database_names, max_bytes, max_waiting_ms, position, pos_wait); + } + cv.notify_all(); + return ret; +} + +void BinlogEventsDispatcher::syncToLocked(const BinlogEventsDispatcherPtr & to) +{ + if (to && this != to.get()) + { + std::vector names; + for (const auto & it : sync_to) + { + if (auto dispatcher = it.lock()) + names.push_back(dispatcher->logger_name); + } + LOG_DEBUG(logger, "Syncing -> ({}) + ({})", boost::algorithm::join(names, ", "), to->logger_name); + sync_to.emplace_back(to); + } +} + +void BinlogEventsDispatcher::syncTo(const BinlogEventsDispatcherPtr & to) +{ + std::lock_guard lock(mutex); + syncToLocked(to); +} + +Position BinlogEventsDispatcher::getPosition() const +{ + std::lock_guard lock(mutex); + return position; +} + +bool BinlogEventsDispatcher::trySyncLocked(BinlogEventsDispatcherPtr & to) +{ + { + std::lock_guard lock(to->mutex); + /// Don't catch up if positions do not have GTIDs yet + const auto & cur_sets = position.gtid_sets.sets; + const auto & sets = to->position.gtid_sets.sets; + /// Sync to only started dispatchers + if (!to->binlog_read_from || (cur_sets.empty() && sets.empty()) || to->position != position) + return false; + + flushBufferLocked(); + to->flushBufferLocked(); + LOG_DEBUG(logger, "Synced up: {} -> {}: {}:{}.{}: ({}) + ({})", logger_name, to->logger_name, + position.binlog_name, position.gtid_sets.toString(), position.binlog_pos, getBinlogNames(to->binlogs), getBinlogNames(binlogs)); + std::move(binlogs.begin(), binlogs.end(), std::back_inserter(to->binlogs)); + } + + /// Notify that new binlogs arrived + to->cv.notify_all(); + return true; +} + +void BinlogEventsDispatcher::setBinlogChecksum(const String & checksum) +{ + std::lock_guard lock(mutex); + LOG_DEBUG(logger, "Setting binlog_checksum: {}", checksum); + binlog_checksum = IBinlog::checksumFromString(checksum); +} + +void BinlogFromDispatcher::push(const BinlogEventsDispatcher::Buffer & buffer_) +{ + std::unique_lock lock(mutex); + cv.wait_for(lock, std::chrono::milliseconds(max_waiting_ms), + [&] + { + bool ret = is_cancelled || exception || max_bytes == 0 || buffer.bytes < max_bytes; + if (!ret) + LOG_TRACE(logger, "Waiting: bytes: {} >= {}", buffer.bytes, max_bytes); + return ret; + }); + + if (is_cancelled || exception) + return; + + if (max_bytes != 0 && buffer.bytes >= max_bytes) + { + lock.unlock(); + setException(std::make_exception_ptr( + Exception(ErrorCodes::TIMEOUT_EXCEEDED, + "Timeout exceeded: Waiting: bytes: {} >= {}", buffer.bytes, max_bytes))); + return; + } + + auto it = buffer_.events.begin(); + size_t bytes = buffer_.bytes; + if (!gtid_sets_wait.sets.empty()) + { + if (!buffer_.position.gtid_sets.contains(gtid_sets_wait)) + { + LOG_TRACE(logger, "(wait_until: {} / {}) Skipped bytes: {}", + gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), buffer_.bytes); + return; + } + + std::vector seqs; + for (auto & s : gtid_sets_wait.sets) + { + GTID g; + g.uuid = s.uuid; + for (auto & in : s.intervals) + { + g.seq_no = in.end; + seqs.push_back(g); + } + } + for (; it != buffer_.events.end(); ++it) + { + const auto & event = *it; + auto find_if_func = [&](auto & a) + { + return std::static_pointer_cast(event)->gtid == a; + }; + if (event->header.type != GTID_EVENT || std::find_if(seqs.begin(), seqs.end(), find_if_func) == seqs.end()) + { + LOG_TRACE(logger, "(wait_until: {} / {}) Skipped {}", + gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), magic_enum::enum_name(event->header.type)); + bytes -= event->header.event_size; + continue; + } + LOG_DEBUG(logger, "(wait_until: {} / {}) Starting {}: gtid seq_no: {}", + gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), magic_enum::enum_name(event->header.type), + std::static_pointer_cast(event)->gtid.seq_no); + break; + } + gtid_sets_wait = {}; + } + + if (it != buffer_.events.end()) + { + std::copy(it, buffer_.events.end(), std::back_inserter(buffer.events)); + buffer.bytes += bytes; + buffer.position = buffer_.position; + } + lock.unlock(); + /// Notify that added some event + cv.notify_all(); +} + +static void rethrowIfNeeded(const std::exception_ptr & exception, size_t events_size) +{ + try + { + std::rethrow_exception(exception); + } + catch (const Exception & e) + { + /// If timeout exceeded, it is safe to read all events before rethrowning + if (e.code() == ErrorCodes::TIMEOUT_EXCEEDED && events_size > 0) + return; + throw; + } +} + +static bool isBinlogEventIgnored(const NameSet & mysql_database_names, const BinlogEventPtr & event) +{ + bool ret = false; + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + ret = !mysql_database_names.empty() && !mysql_database_names.contains(std::static_pointer_cast(event)->schema); + break; + case QUERY_EVENT: + if (event->type() != MYSQL_UNHANDLED_EVENT) + { + auto query_event = std::static_pointer_cast(event); + ret = !mysql_database_names.empty() && + !query_event->query_database_name.empty() && + !mysql_database_names.contains(query_event->query_database_name); + } + break; + default: + break; + } + return ret; +} + +bool BinlogFromDispatcher::tryReadEvent(BinlogEventPtr & to, UInt64 ms) +{ + auto wake_up_func = [&] + { + if (exception) + rethrowIfNeeded(exception, buffer.events.size()); + return is_cancelled || !buffer.events.empty(); + }; + to = nullptr; + std::unique_lock lock(mutex); + if (!cv.wait_for(lock, std::chrono::milliseconds(ms), wake_up_func) || is_cancelled || buffer.events.empty()) + return false; + to = buffer.events.front(); + buffer.events.pop_front(); + BinlogParser::updatePosition(to, position); + buffer.bytes -= to->header.event_size; + if (isBinlogEventIgnored(mysql_database_names, to)) + to = std::make_shared(EventHeader(to->header)); + lock.unlock(); + /// Notify that removed some event + cv.notify_all(); + return true; +} + +Position BinlogFromDispatcher::getPosition() const +{ + std::lock_guard lock(mutex); + return position; +} + +void BinlogFromDispatcher::setPosition(const Position & initial, const Position & wait) +{ + std::lock_guard lock(mutex); + if (wait.gtid_sets.sets.empty()) + { + position = initial; + } + else + { + position = wait; + gtid_sets_wait = wait.gtid_sets; + } +} + +void BinlogFromDispatcher::setException(const std::exception_ptr & exception_) +{ + { + std::lock_guard lock(mutex); + exception = exception_; + } + cv.notify_all(); +} + +BinlogEventsDispatcher::BinlogMetadata BinlogFromDispatcher::getBinlogMetadata() const +{ + std::lock_guard lock(mutex); + BinlogEventsDispatcher::BinlogMetadata ret; + ret.name = name; + ret.position_write = buffer.position; + ret.position_read = position; + ret.size = buffer.events.size(); + ret.bytes = buffer.bytes; + ret.max_bytes = max_bytes; + ret.max_waiting_ms = max_waiting_ms; + return ret; +} + +BinlogEventsDispatcher::DispatcherMetadata BinlogEventsDispatcher::getDispatcherMetadata() const +{ + std::lock_guard lock(mutex); + DispatcherMetadata ret; + ret.name = logger_name; + ret.position = position; + ret.events_read_per_sec = events_read_per_sec; + ret.bytes_read_per_sec = bytes_read_per_sec; + ret.events_flush_per_sec = events_flush_per_sec; + ret.bytes_flush_per_sec = bytes_flush_per_sec; + + for (const auto & it : binlogs) + { + if (auto binlog = it.lock()) + ret.binlogs.push_back(binlog->getBinlogMetadata()); + } + return ret; +} + +} diff --git a/src/Databases/MySQL/MySQLBinlogEventsDispatcher.h b/src/Databases/MySQL/MySQLBinlogEventsDispatcher.h new file mode 100644 index 00000000000..43379697015 --- /dev/null +++ b/src/Databases/MySQL/MySQLBinlogEventsDispatcher.h @@ -0,0 +1,136 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB::MySQLReplication +{ + +class BinlogEventsDispatcher; +using BinlogEventsDispatcherPtr = std::shared_ptr; +class BinlogFromDispatcher; + +/** Reads the binlog events from one source and dispatches them over consumers. + * If it can catch up the position of the another dispatcher, it will move all consumers to this dispatcher. + */ +class BinlogEventsDispatcher final : boost::noncopyable +{ +public: + BinlogEventsDispatcher(const String & logger_name_ = "BinlogDispatcher", size_t max_bytes_in_buffer_ = 1_MiB, UInt64 max_flush_ms_ = 1000); + ~BinlogEventsDispatcher(); + + /// Moves all IBinlog objects to \a to if it has the same position + /// Supports syncing to multiple dispatchers + void syncTo(const BinlogEventsDispatcherPtr & to); + + /** Creates a binlog and starts the dispatching + * binlog_read_from Source binlog to read events from + * name Identifies the binlog, could be not unique + * mysql_database_names Returns events only from these databases + * max_bytes Defines a limit in bytes for this binlog + * Note: Dispatching will be stopped for all binlogs if bytes in queue increases this limit + * max_waiting_ms Max wait time when max_bytes exceeded + */ + BinlogPtr start(const BinlogPtr & binlog_read_from_, + const String & name = {}, + const NameSet & mysql_database_names = {}, + size_t max_bytes = 0, + UInt64 max_waiting_ms = 0); + + /** Creates a binlog if the dispatcher is started + * executed_gtid_set Can be higher value than current, + * otherwise not possible to attach + * name Identifies the binlog, could be not unique + * mysql_database_names Returns events only from these databases + * max_bytes Defines a limit in bytes for this binlog + * max_waiting_ms Max wait time when max_bytes exceeded + */ + BinlogPtr attach(const String & executed_gtid_set, + const String & name = {}, + const NameSet & mysql_database_names = {}, + size_t max_bytes = 0, + UInt64 max_waiting_ms = 0); + + /// Cleans the destroyed binlogs up and returns true if empty + bool cleanupBinlogsAndStop(); + + /// Changes binlog_checksum for binlog_read_from + void setBinlogChecksum(const String & checksum); + + Position getPosition() const; + + struct BinlogMetadata + { + String name; + /// Position that was written to + Position position_write; + /// Position that was read from + Position position_read; + size_t size = 0; + size_t bytes = 0; + size_t max_bytes = 0; + UInt64 max_waiting_ms = 0; + }; + struct DispatcherMetadata + { + String name; + Position position; + float events_read_per_sec = 0; + float bytes_read_per_sec = 0; + float events_flush_per_sec = 0; + float bytes_flush_per_sec = 0; + std::vector binlogs; + }; + DispatcherMetadata getDispatcherMetadata() const; + + struct Buffer + { + std::deque events; + size_t bytes = 0; + Position position; + }; + +private: + bool cleanupLocked(const std::function & binlog)> & fn = {}); + bool startLocked(const String & executed_gtid_set); + void stopLocked(); + BinlogPtr createBinlogLocked(const String & name = {}, + const NameSet & mysql_database_names = {}, + size_t max_bytes = 0, + UInt64 max_waiting_ms = 0, + const Position & pos_initial = {}, + const Position & pos_wait = {}); + void syncToLocked(const BinlogEventsDispatcherPtr & to); + bool trySyncLocked(BinlogEventsDispatcherPtr & to); + void flushBufferLocked(); + void dispatchEvents(); + + const String logger_name; + const size_t max_bytes_in_buffer = 0; + const UInt64 max_flush_ms = 0; + Poco::Logger * logger = nullptr; + + BinlogPtr binlog_read_from; + + Position position; + std::vector> sync_to; + std::vector> binlogs; + std::atomic_bool is_cancelled{false}; + mutable std::mutex mutex; + std::condition_variable cv; + std::unique_ptr dispatching_thread; + IBinlog::Checksum binlog_checksum = IBinlog::CRC32; + bool is_started = false; + Buffer buffer; + float events_read_per_sec = 0; + float bytes_read_per_sec = 0; + UInt64 events_flush = 0; + UInt64 events_flush_total_time = 0; + float events_flush_per_sec = 0; + UInt64 bytes_flush = 0; + float bytes_flush_per_sec = 0; +}; + +} diff --git a/src/Databases/MySQL/tests/data/binlog.000016 b/src/Databases/MySQL/tests/data/binlog.000016 new file mode 100644 index 00000000000..e27a2bac9ff Binary files /dev/null and b/src/Databases/MySQL/tests/data/binlog.000016 differ diff --git a/src/Databases/MySQL/tests/data/binlog.001390 b/src/Databases/MySQL/tests/data/binlog.001390 new file mode 100644 index 00000000000..1fbaae08a98 Binary files /dev/null and b/src/Databases/MySQL/tests/data/binlog.001390 differ diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp new file mode 100644 index 00000000000..df8433f7cce --- /dev/null +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -0,0 +1,1754 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace DB; +using namespace DB::MySQLReplication; + +#define TRY_LOOP_IMPL(expr, timeout) \ + const unsigned long _test_step = (timeout) < 350 ? (timeout) / 7 + 1 : 50; \ + for (int _i = 0; _i < (timeout) && !(expr); _i += _test_step) \ + std::this_thread::sleep_for(std::chrono::milliseconds(_test_step)); \ + +#define TRY_ASSERT_EQ(expr, expected, timeout) \ +do { \ + TRY_LOOP_IMPL(((expr) == (expected)), timeout) \ + ASSERT_EQ((expr), expected); \ +} while (false) + +#define TRY_ASSERT_TRUE(expr, timeout) \ + TRY_ASSERT_EQ((expr), true, timeout) + +static std::string getTestDataRoot() +{ + static auto root = []() -> std::string + { + std::filesystem::path testdata_path("src/Databases/MySQL/tests/data"); + auto basedir = std::filesystem::current_path(); + while (basedir != basedir.parent_path()) + { + if (std::filesystem::exists(basedir / testdata_path)) + { + testdata_path = basedir / testdata_path; + break; + } + basedir = basedir.parent_path(); + } + auto path = basedir / testdata_path; + return std::filesystem::exists(path) ? path.string() : ""; + }(); + return root; +} + +static String getTestDataPath(const String & testdata_file) +{ + return (std::filesystem::path(getTestDataRoot()) / testdata_file).string(); +} + +class MySQLBinlog : public ::testing::Test +{ +protected: + void SetUp() override + { + if (getTestDataRoot().empty()) + GTEST_SKIP() << "Skipping all tests since no test data files found"; + } + + UInt64 timeout = 25000; +}; + +TEST_F(MySQLBinlog, positionEndLogPosOverflow) +{ + Position position; + EventHeader header; + header.event_size = 8161; + header.log_pos = 4294958114; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, header.log_pos); + ASSERT_TRUE(position.binlog_name.empty()); + ASSERT_TRUE(position.gtid_sets.toString().empty()); + ASSERT_EQ(position.timestamp, 0); + + header.log_pos = 4294966149; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, header.log_pos); + UInt64 prev = position.binlog_pos; + + header.log_pos = 7014; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); + prev = position.binlog_pos; + + header.event_size = 8107; + header.log_pos = 15121; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); + prev = position.binlog_pos; + + header.event_size = 8131; + header.log_pos = 23252; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); + + position.binlog_pos = 4724501662; + prev = position.binlog_pos; + + header.event_size = 8125; + header.log_pos = 429542491; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); + + position.binlog_pos = 5474055640; + prev = position.binlog_pos; + + header.event_size = 31; + header.log_pos = 1179088375; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); + + position = {}; + header.log_pos = 4294965445; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, header.log_pos); + prev = position.binlog_pos; + + header.event_size = 7927; + header.log_pos = 6076; + BinlogParser::updatePosition(std::make_shared(EventHeader(header)), position); + ASSERT_EQ(position.binlog_pos, prev + header.event_size); +} + +TEST_F(MySQLBinlog, positionEquals) +{ + Position p1; + Position p2; + ASSERT_EQ(p1, p2); + p1.binlog_pos = 1; + ASSERT_NE(p1, p2); + p2.binlog_pos = 1; + ASSERT_EQ(p1, p2); + p1.gtid_sets.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87828"); + ASSERT_NE(p1, p2); + p2.gtid_sets.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87828"); + ASSERT_EQ(p1, p2); + p1.binlog_name = "name"; + ASSERT_NE(p1, p2); + p2.binlog_name = "name"; + ASSERT_EQ(p1, p2); +} + +TEST_F(MySQLBinlog, positionMultimaster) +{ + Position p1; + Position p2; + p1.gtid_sets.parse("f189aee3-3cd2-11ed-a407-fa163ea7d4ed:1-3602,ff9de833-3cd2-11ed-87b7-fa163e99d975:1-172"); + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:1-172"); + ASSERT_TRUE(p1.gtid_sets.contains(p2.gtid_sets)); + ASSERT_FALSE(p2.gtid_sets.contains(p1.gtid_sets)); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:1-10"); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:172"); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:171-172"); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:171-173"); + ASSERT_TRUE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("ff9de833-3cd2-11ed-87b7-fa163e99d975:173"); + ASSERT_TRUE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx:173"); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("f189aee3-3cd2-11ed-a407-fa163ea7d4ed:1-3602,ff9de833-3cd2-11ed-87b7-fa163e99d975:1-172"); + ASSERT_FALSE(BinlogParser::isNew(p1, p2)); + + p2.gtid_sets = {}; + p2.gtid_sets.parse("f189aee3-3cd2-11ed-a407-fa163ea7d4ed:1-3602,ff9de833-3cd2-11ed-87b7-fa163e99d975:1-173"); + ASSERT_TRUE(BinlogParser::isNew(p1, p2)); +} + +static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) +{ + BinlogEventPtr event; + int count = 0; + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, FORMAT_DESCRIPTION_EVENT); + ASSERT_EQ(event->header.timestamp, 1651442421); + ASSERT_EQ(event->header.event_size, 122); + ASSERT_EQ(event->header.log_pos, 126); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, PREVIOUS_GTIDS_EVENT); + ASSERT_EQ(event->header.timestamp, 1651442421); + ASSERT_EQ(event->header.event_size, 71); + ASSERT_EQ(event->header.log_pos, 197); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475081); + ASSERT_EQ(event->header.event_size, 79); + ASSERT_EQ(event->header.log_pos, 276); + + auto gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + ASSERT_EQ(gtid_event->commit_flag, 0); + GTIDSets gtid_expected; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87828"); + GTIDSets gtid_actual; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475081); + ASSERT_EQ(event->header.event_size, 73); + ASSERT_EQ(event->header.log_pos, 349); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475081); + ASSERT_EQ(event->header.event_size, 48); + ASSERT_EQ(event->header.log_pos, 397); + + auto table_event = std::static_pointer_cast(event); + ASSERT_TRUE(table_event); + ASSERT_EQ(table_event->table_id, 7566); + ASSERT_EQ(table_event->flags, 1); + ASSERT_EQ(table_event->schema_len, 2u); + ASSERT_EQ(table_event->schema, "db"); + ASSERT_EQ(table_event->table_len, 1u); + ASSERT_EQ(table_event->table, "a"); + ASSERT_EQ(table_event->column_count, 4); + std::vector column_type = {3u, 3u, 3u, 3u}; + ASSERT_EQ(table_event->column_type, column_type); + std::vector column_meta = {0, 0, 0, 0}; + ASSERT_EQ(table_event->column_meta, column_meta); + std::vector column_charset = {}; + ASSERT_EQ(table_event->column_charset, column_charset); + ASSERT_EQ(table_event->default_charset, 255u); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, WRITE_ROWS_EVENT_V2); + ASSERT_EQ(event->header.timestamp, 1651475081); + ASSERT_EQ(event->header.event_size, 52); + ASSERT_EQ(event->header.log_pos, 449); + + ASSERT_EQ(event->type(), MYSQL_UNPARSED_ROWS_EVENT); + event = std::static_pointer_cast(event)->parse(); + + ASSERT_TRUE(event); + auto write_event = std::static_pointer_cast(event); + ASSERT_TRUE(write_event); + ASSERT_EQ(write_event->number_columns, 4); + ASSERT_EQ(write_event->schema, "db"); + ASSERT_EQ(write_event->table, "a"); + ASSERT_EQ(write_event->rows.size(), 1); + ASSERT_EQ(write_event->rows[0].getType(), Field::Types::Tuple); + auto row_data = write_event->rows[0].get(); + ASSERT_EQ(row_data.size(), 4u); + ASSERT_EQ(row_data[0].get(), 1u); + ASSERT_EQ(row_data[1].get(), 1u); + ASSERT_EQ(row_data[2].get(), 1u); + ASSERT_EQ(row_data[3].get(), 1u); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475081); + ASSERT_EQ(event->header.event_size, 31); + ASSERT_EQ(event->header.log_pos, 480); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475244); + ASSERT_EQ(event->header.event_size, 79); + ASSERT_EQ(event->header.log_pos, 559); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475244); + ASSERT_EQ(event->header.event_size, 82); + ASSERT_EQ(event->header.log_pos, 641); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475244); + ASSERT_EQ(event->header.event_size, 48); + ASSERT_EQ(event->header.log_pos, 689); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.timestamp, 1651475244); + ASSERT_EQ(event->header.event_size, 70); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + ASSERT_EQ(event->header.log_pos, 759); + + ASSERT_EQ(event->type(), MYSQL_UNPARSED_ROWS_EVENT); + event = std::static_pointer_cast(event)->parse(); + + ASSERT_TRUE(event); + auto update_event = std::static_pointer_cast(event); + ASSERT_TRUE(update_event); + ASSERT_EQ(update_event->number_columns, 4); + ASSERT_EQ(update_event->schema, "db"); + ASSERT_EQ(update_event->table, "a"); + ASSERT_EQ(update_event->rows.size(), 2); + ASSERT_EQ(update_event->rows[0].getType(), Field::Types::Tuple); + row_data = update_event->rows[0].get(); + ASSERT_EQ(row_data.size(), 4u); + ASSERT_EQ(row_data[0].get(), 1u); + ASSERT_EQ(row_data[1].get(), 1u); + ASSERT_EQ(row_data[2].get(), 1u); + ASSERT_EQ(row_data[3].get(), 1u); + row_data = update_event->rows[1].get(); + ASSERT_EQ(row_data.size(), 4u); + ASSERT_EQ(row_data[0].get(), 1u); + ASSERT_EQ(row_data[1].get(), 2u); + ASSERT_EQ(row_data[2].get(), 1u); + ASSERT_EQ(row_data[3].get(), 1u); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651475244); + ASSERT_EQ(event->header.event_size, 31); + ASSERT_EQ(event->header.log_pos, 790); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651482394); + ASSERT_EQ(event->header.event_size, 79); + ASSERT_EQ(event->header.log_pos, 869); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.timestamp, 1651482394); + ASSERT_EQ(event->header.event_size, 82); + ASSERT_EQ(event->header.log_pos, 951); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.timestamp, 1651482394); + ASSERT_EQ(event->header.event_size, 48); + ASSERT_EQ(event->header.log_pos, 999); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + ASSERT_EQ(event->header.timestamp, 1651482394); + ASSERT_EQ(event->header.event_size, 70); + ASSERT_EQ(event->header.log_pos, 1069); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651482394); + ASSERT_EQ(event->header.event_size, 31); + ASSERT_EQ(event->header.log_pos, 1100); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483072); + ASSERT_EQ(event->header.event_size, 79); + ASSERT_EQ(event->header.log_pos, 1179); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483072); + ASSERT_EQ(event->header.event_size, 82); + ASSERT_EQ(event->header.log_pos, 1261); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483072); + ASSERT_EQ(event->header.event_size, 48); + ASSERT_EQ(event->header.log_pos, 1309); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + ASSERT_EQ(event->header.timestamp, 1651483072); + ASSERT_EQ(event->header.event_size, 70); + ASSERT_EQ(event->header.log_pos, 1379); + + ASSERT_EQ(binlog.getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87830"); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483072); + ASSERT_EQ(event->header.event_size, 31); + ASSERT_EQ(event->header.log_pos, 1410); + + ASSERT_EQ(binlog.getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87831"); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483336); + ASSERT_EQ(event->header.event_size, 79); + ASSERT_EQ(event->header.log_pos, 1489); + gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + ASSERT_EQ(gtid_event->commit_flag, 0); + gtid_expected = {}; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87832"); + gtid_actual = {}; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + ASSERT_EQ(binlog.getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87831"); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483336); + ASSERT_EQ(event->header.event_size, 82); + ASSERT_EQ(event->header.log_pos, 1571); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.timestamp, 1651483336); + ASSERT_EQ(event->header.event_size, 48); + ASSERT_EQ(event->header.log_pos, 1619); + } + + int total_count = filtered ? 37 : 48; + for (; count < total_count; ++count) + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(event->header.timestamp, 1651528821); + ASSERT_EQ(event->header.event_size, 44); + ASSERT_EQ(event->header.log_pos, 3091); + ASSERT_EQ(count, total_count); + ASSERT_FALSE(binlog.tryReadEvent(event, 10)); + + auto position = binlog.getPosition(); + ASSERT_EQ(position.binlog_pos, 4); + ASSERT_EQ(position.binlog_name, "binlog.001391"); + ASSERT_EQ(position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87836"); +} + +TEST_F(MySQLBinlog, binlogFromFile1) +{ + BinlogFromFile binlog; + binlog.open(getTestDataPath("binlog.001390")); + testFile1(binlog, timeout); +} + +TEST_F(MySQLBinlog, binlogFromFactory1) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto binlog = f->createBinlog(""); + + testFile1(*binlog, timeout); +} + +TEST_F(MySQLBinlog, binlogFromFactory1ExecutedGtidSet) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + BinlogEventPtr event; + + auto binlog = f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"); + + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 559); + + auto gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + GTIDSets gtid_expected; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87829"); + GTIDSets gtid_actual; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + for (int count = 8; count < 48; ++count) + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + auto position = binlog->getPosition(); + ASSERT_EQ(position.binlog_pos, 4); + ASSERT_EQ(position.binlog_name, "binlog.001391"); + ASSERT_EQ(position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + ASSERT_FALSE(binlog->tryReadEvent(event, 10)); + + binlog = f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87829"); + + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 869); + + gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + gtid_expected = {}; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87830"); + gtid_actual = {}; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + for (int count = 13; count < 48; ++count) + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + position = binlog->getPosition(); + ASSERT_EQ(position.binlog_pos, 4); + ASSERT_EQ(position.binlog_name, "binlog.001391"); + ASSERT_EQ(position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + ASSERT_FALSE(binlog->tryReadEvent(event, 10)); + + binlog = f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87834"); + + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 2443); + + gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + gtid_expected = {}; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:87835"); + gtid_actual = {}; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + for (int count = 38; count < 48; ++count) + ASSERT_TRUE(binlog->tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + position = binlog->getPosition(); + ASSERT_EQ(position.binlog_pos, 4); + ASSERT_EQ(position.binlog_name, "binlog.001391"); + ASSERT_EQ(position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + ASSERT_FALSE(binlog->tryReadEvent(event, 10)); +} + +TEST_F(MySQLBinlog, binlogFromDispatcher1) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + BinlogEventsDispatcher d; + auto b = d.start(f->createBinlog("")); + testFile1(*b, timeout, true); + ASSERT_EQ(d.getPosition().gtid_sets.toString(), b->getPosition().gtid_sets.toString()); +} + +static void testFile2(IBinlog & binlog, UInt64 timeout, bool filtered = false) +{ + BinlogEventPtr event; + int count = 0; + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, FORMAT_DESCRIPTION_EVENT); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, PREVIOUS_GTIDS_EVENT); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 608); + + auto gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + ASSERT_EQ(gtid_event->commit_flag, 0); + GTIDSets gtid_expected; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:1059"); + GTIDSets gtid_actual; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.log_pos, 701); + + if (!filtered) + { + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, TABLE_MAP_EVENT); + ASSERT_EQ(event->header.log_pos, 760); + } + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + ASSERT_EQ(event->header.log_pos, 830); + + ASSERT_EQ(event->type(), MYSQL_UNPARSED_ROWS_EVENT); + event = std::static_pointer_cast(event)->parse(); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(event->header.log_pos, 861); + + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + ++count; + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 940); + gtid_event = std::static_pointer_cast(event); + ASSERT_TRUE(gtid_event); + ASSERT_EQ(gtid_event->commit_flag, 0); + gtid_expected = {}; + gtid_expected.parse("a9d88f83-c14e-11ec-bb36-244bfedf7766:1060"); + gtid_actual = {}; + gtid_actual.update(gtid_event->gtid); + ASSERT_EQ(gtid_actual.toString(), gtid_expected.toString()); + + int total_count = filtered ? 13 : 18; + for (; count < total_count; ++count) + ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(event->header.log_pos, 1237); + ASSERT_EQ(count, total_count); + ASSERT_FALSE(binlog.tryReadEvent(event, 10)); + + auto position = binlog.getPosition(); + ASSERT_EQ(position.binlog_pos, 4); + ASSERT_EQ(position.binlog_name, "binlog.000017"); + ASSERT_EQ(binlog.getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); +} + +TEST_F(MySQLBinlog, binlogFromFile2) +{ + BinlogFromFile binlog; + binlog.open(getTestDataPath("binlog.000016")); + testFile2(binlog, timeout); +} + +TEST_F(MySQLBinlog, binlogFromDispatcher2) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + BinlogEventsDispatcher d; + auto b = d.start(f->createBinlog("")); + testFile2(*b, timeout, true); + ASSERT_EQ(d.getPosition().gtid_sets.toString(), b->getPosition().gtid_sets.toString()); +} + +TEST_F(MySQLBinlog, binlogsFromOneFile) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + auto b1 = d1->start(f->createBinlog("")); + auto b2 = d2->start(f->createBinlog("")); + + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), b2->getPosition().gtid_sets.toString()); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b1->getPosition().binlog_pos, b2->getPosition().binlog_pos); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); +} + +TEST_F(MySQLBinlog, empty) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + ASSERT_TRUE(d1->getDispatcherMetadata().binlogs.empty()); +} + +TEST_F(MySQLBinlog, binlogsAfterStart) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + + auto b1 = d1->start(f->createBinlog("")); + auto b2 = d1->start(f->createBinlog("")); + ASSERT_FALSE(b2); + + testFile2(*b1, timeout, true); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); +} + +TEST_F(MySQLBinlog, metadata) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + ASSERT_TRUE(d1->getDispatcherMetadata().binlogs.empty()); + ASSERT_EQ(d1->getDispatcherMetadata().name, "d1"); + ASSERT_TRUE(d1->getDispatcherMetadata().position.gtid_sets.sets.empty()); + + auto b1 = d1->start(f->createBinlog("")); + ASSERT_TRUE(b1); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 1); + ASSERT_FALSE(d1->start(f->createBinlog(""))); + + TRY_ASSERT_TRUE(!d1->getDispatcherMetadata().position.gtid_sets.sets.empty(), timeout); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 1); + + testFile2(*b1, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 1); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_write.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].size, 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].bytes, 0); +} + +TEST_F(MySQLBinlog, catchingUp) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + ASSERT_TRUE(d1->getDispatcherMetadata().binlogs.empty()); + ASSERT_TRUE(d2->getDispatcherMetadata().binlogs.empty()); + + d2->syncTo(d1); + + auto b1 = d1->start(f->createBinlog("")); + auto b2 = d2->start(f->createBinlog("")); + ASSERT_TRUE(b1); + ASSERT_TRUE(b2); + TRY_ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2, timeout); + ASSERT_FALSE(d1->getDispatcherMetadata().position.gtid_sets.sets.empty()); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_FALSE(d2->getDispatcherMetadata().position.gtid_sets.sets.empty()); + ASSERT_FALSE(d2->start(f->createBinlog(""))); + + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), b2->getPosition().gtid_sets.toString()); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b1->getPosition().binlog_pos, b2->getPosition().binlog_pos); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_write.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.binlog_pos, 4); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_write.binlog_pos, 4); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].size, 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].bytes, 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_write.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_read.binlog_pos, 4); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_write.binlog_pos, 4); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].size, 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].bytes, 0); +} + +TEST_F(MySQLBinlog, catchingUpFastMaster) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + + d2->syncTo(d1); + + auto b1 = d1->start(f->createBinlog("")); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + auto b2 = d2->start(f->createBinlog("")); + + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), b2->getPosition().gtid_sets.toString()); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b1->getPosition().binlog_pos, b2->getPosition().binlog_pos); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2); +} + +TEST_F(MySQLBinlog, catchingUpFastSlave) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + + d2->syncTo(d1); + + auto b2 = d2->start(f->createBinlog("")); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + auto b1 = d1->start(f->createBinlog("")); + + TRY_ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2, timeout); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_FALSE(d1->getDispatcherMetadata().position.gtid_sets.sets.empty()); + ASSERT_FALSE(d2->getDispatcherMetadata().position.gtid_sets.sets.empty()); + + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), b2->getPosition().gtid_sets.toString()); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b1->getPosition().binlog_pos, b2->getPosition().binlog_pos); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2); +} + +TEST_F(MySQLBinlog, catchingUpWithoutWaiting) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + + d2->syncTo(d1); + + auto b1 = d1->start(f->createBinlog("")); + auto b2 = d2->start(f->createBinlog("")); + + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), b2->getPosition().gtid_sets.toString()); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b1->getPosition().binlog_pos, b2->getPosition().binlog_pos); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + TRY_ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 0, timeout); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.binlog_pos, 4); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_read.binlog_pos, 4); +} + +TEST_F(MySQLBinlog, catchingUpManyToOne) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d0 = std::make_shared("d0"); + std::vector ds; + int n = 10; + for (int i = 0; i < n; ++i) + { + auto d = std::make_shared("r" + std::to_string(i)); + d->syncTo(d0); + ds.push_back(d); + } + + for (int i = 0; i < n; ++i) + ASSERT_TRUE(ds[i]->getDispatcherMetadata().binlogs.empty()); + + auto b0 = d0->start(f->createBinlog(""), "b"); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs.size(), 1); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[0].position_read.binlog_pos, 0); + std::vector bs; + bs.resize(n); + for (int i = 0; i < n; ++i) + bs[i] = ds[i]->start(f->createBinlog(""), "b" + std::to_string(i)); + + TRY_ASSERT_EQ(d0->getDispatcherMetadata().binlogs.size(), n + 1, timeout); + ASSERT_FALSE(d0->getDispatcherMetadata().position.gtid_sets.sets.empty()); + for (int i = 0; i < n; ++i) + { + ASSERT_EQ(ds[i]->getDispatcherMetadata().binlogs.size(), 0); + ASSERT_FALSE(ds[i]->getDispatcherMetadata().position.gtid_sets.sets.empty()); + } + + testFile2(*b0, timeout, true); + for (int i = 0; i < n; ++i) + testFile2(*bs[i], timeout, true); + + ASSERT_EQ(b0->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(b0->getPosition().binlog_pos, 4); + + for (int i = 0; i < n; ++i) + { + ASSERT_EQ(bs[i]->getPosition().gtid_sets.toString(), b0->getPosition().gtid_sets.toString()); + ASSERT_EQ(bs[i]->getPosition().binlog_pos, b0->getPosition().binlog_pos); + } + + for (int i = 0; i < n; ++i) + ASSERT_EQ(ds[i]->getDispatcherMetadata().binlogs.size(), 0); + + ASSERT_EQ(d0->getDispatcherMetadata().binlogs.size(), n + 1); + for (int i = 0; i < n + 1; ++i) + { + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].position_write.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].position_read.binlog_pos, 4); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].position_write.binlog_pos, 4); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].size, 0); + ASSERT_EQ(d0->getDispatcherMetadata().binlogs[i].bytes, 0); + } +} + +TEST_F(MySQLBinlog, catchingUpStopApplier) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + + d2->syncTo(d1); + + auto b1 = d1->start(f->createBinlog("")); + ASSERT_TRUE(b1); + d1 = nullptr; + + auto b2 = d2->start(f->createBinlog("")); + ASSERT_TRUE(b2); + testFile2(*b2, timeout, true); + ASSERT_EQ(b2->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); +} + +TEST_F(MySQLBinlog, catchingUpOneToAllPrevious) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + std::vector ds; + int n = 10; + for (int i = 0; i < n; ++i) + { + auto d = std::make_shared("d" + std::to_string(i)); + for (int j = 0; j < i; ++j) + d->syncTo(ds[j]); + ds.push_back(d); + } + + for (int i = 0; i < n; ++i) + ASSERT_TRUE(ds[i]->getDispatcherMetadata().binlogs.empty()); + + std::vector bs; + bs.resize(n); + for (int i = 0; i < n; ++i) + bs[i] = ds[i]->start(f->createBinlog(""), "b" + std::to_string(i)); + + auto check_dispatchers = [&] + { + int not_empty_count = 0; + int ii = 0; + for (int i = 0; i < n; ++i) + { + if (!ds[i]->getDispatcherMetadata().binlogs.empty()) + { + ++not_empty_count; + ii = i; + } + } + return not_empty_count == 1 && ds[ii]->getDispatcherMetadata().binlogs.size() == n; + }; + + for (int i = 0; i < n; ++i) + testFile2(*bs[i], timeout, true); + + TRY_ASSERT_TRUE(check_dispatchers(), timeout); + + for (int i = 1; i < n; ++i) + { + ASSERT_EQ(bs[i]->getPosition().gtid_sets.toString(), bs[0]->getPosition().gtid_sets.toString()); + ASSERT_EQ(bs[i]->getPosition().binlog_pos, bs[0]->getPosition().binlog_pos); + } + + int i = 0; + for (int j = 0; j < n; ++j) + { + auto bs_ = ds[j]->getDispatcherMetadata().binlogs; + for (; i < bs_.size(); ++i) + { + ASSERT_EQ(bs_[i].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + ASSERT_EQ(bs_[i].position_write.gtid_sets.toString(), bs_[i].position_write.gtid_sets.toString()); + ASSERT_EQ(bs_[i].position_read.binlog_pos, 4); + ASSERT_EQ(bs_[i].position_write.binlog_pos, 4); + ASSERT_EQ(bs_[i].size, 0); + ASSERT_EQ(bs_[i].bytes, 0); + } + } + ASSERT_EQ(i, n); +} + +TEST_F(MySQLBinlog, catchingUpMaxBytes) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + + d2->syncTo(d1); + + auto b1 = d1->start(f->createBinlog(""), "big"); + auto b2 = d2->start(f->createBinlog(""), "small", {}, 1, 10000); + + testFile2(*b2, timeout, true); + TRY_ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2, timeout); + ASSERT_EQ(d1->getDispatcherMetadata().position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1058-1060"); + testFile2(*b1, timeout, true); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_write.gtid_sets.toString(), d1->getDispatcherMetadata().position.gtid_sets.toString()); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].position_read.gtid_sets.toString(), d1->getDispatcherMetadata().position.gtid_sets.toString()); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_write.gtid_sets.toString(), d1->getDispatcherMetadata().position.gtid_sets.toString()); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].position_read.gtid_sets.toString(), d1->getDispatcherMetadata().position.gtid_sets.toString()); +} + +TEST_F(MySQLBinlog, filterEvents) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + auto b1 = d1->start(f->createBinlog(""), "b1", {"db"}); + auto b2 = d2->start(f->createBinlog(""), "b2", {"unknown_database"}); + + BinlogEventPtr event; + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + if (event->type() == MYSQL_UNPARSED_ROWS_EVENT) + { + ASSERT_EQ(std::static_pointer_cast(event)->schema, "db"); + } + break; + default: + break; + } + } + + ASSERT_FALSE(b1->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case ROTATE_EVENT: + case XID_EVENT: + case QUERY_EVENT: + case GTID_EVENT: + break; + default: + if (event->type() != MYSQL_UNHANDLED_EVENT) + FAIL() << "Unexpected event: " << magic_enum::enum_name(event->header.type); + break; + } + } + + ASSERT_FALSE(b2->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87836"); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + ASSERT_EQ(b2->getPosition().gtid_sets.toString(), b1->getPosition().gtid_sets.toString()); + ASSERT_EQ(b2->getPosition().binlog_pos, b1->getPosition().binlog_pos); + ASSERT_FALSE(b2->tryReadEvent(event, 0)); +} + +TEST_F(MySQLBinlog, filterEventsMultipleDatabases) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + auto d3 = std::make_shared("d3"); + auto d4 = std::make_shared("d4"); + auto d5 = std::make_shared("d5"); + auto all_dbs = d1->start(f->createBinlog(""), "all_dbs"); + auto db = d2->start(f->createBinlog(""), "db", {"db"}); + auto aborted = d3->start(f->createBinlog(""), "aborted_full_sync", {"aborted_full_sync"}); + auto db_and_aborted = d4->start(f->createBinlog(""), "db_and_aborted", {"db", "aborted_full_sync"}); + auto unknown = d5->start(f->createBinlog(""), "unknown", {"unknown1", "unknown2"}); + + BinlogEventPtr event; + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(all_dbs->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + ASSERT_EQ(event->type(), MYSQL_UNPARSED_ROWS_EVENT); + break; + default: + break; + } + } + + ASSERT_FALSE(all_dbs->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(db->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + if (event->type() == MYSQL_UNPARSED_ROWS_EVENT) + { + ASSERT_EQ(std::static_pointer_cast(event)->schema, "db"); + } + break; + default: + break; + } + } + + ASSERT_FALSE(db->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(aborted->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + if (event->type() == MYSQL_UNPARSED_ROWS_EVENT) + { + ASSERT_EQ(std::static_pointer_cast(event)->schema, "aborted_full_sync"); + } + break; + default: + break; + } + } + + ASSERT_FALSE(aborted->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(db_and_aborted->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case WRITE_ROWS_EVENT_V1: + case WRITE_ROWS_EVENT_V2: + case DELETE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT_V2: + case UPDATE_ROWS_EVENT_V1: + case UPDATE_ROWS_EVENT_V2: + { + ASSERT_EQ(event->type(), MYSQL_UNPARSED_ROWS_EVENT); + auto schema = std::static_pointer_cast(event)->schema; + ASSERT_TRUE(schema == "db" || schema == "aborted_full_sync"); + } break; + default: + break; + } + } + + ASSERT_FALSE(db_and_aborted->tryReadEvent(event, 0)); + ASSERT_FALSE(event); + + for (int i = 0; i < 37; ++i) + { + ASSERT_TRUE(unknown->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + switch (event->header.type) + { + case ROTATE_EVENT: + case XID_EVENT: + case QUERY_EVENT: + case GTID_EVENT: + break; + default: + ASSERT_EQ(event->type(), MYSQL_UNHANDLED_EVENT); + break; + } + } + + ASSERT_FALSE(unknown->tryReadEvent(event, 0)); + ASSERT_FALSE(event); +} + +TEST_F(MySQLBinlog, dispatcherStop) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto b1 = d1->start(f->createBinlog("")); + ASSERT_TRUE(b1); + d1 = nullptr; + BinlogEventPtr event; + EXPECT_THROW(for (int i = 0; i < 18 + 1; ++i) b1->tryReadEvent(event, timeout), DB::Exception); +} + +TEST_F(MySQLBinlog, executedGTIDSet) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto d1 = std::make_shared("d1"); + auto b1 = d1->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-1058"), "b1"); + + BinlogEventPtr event; + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(event->header.log_pos, 608); + + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(event->header.log_pos, 701); + + for (int i = 0; i < 7; ++i) + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(event->header.log_pos, 1237); + ASSERT_EQ(d1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-1060"); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-1060"); + ASSERT_FALSE(b1->tryReadEvent(event, 0)); +} + +TEST_F(MySQLBinlog, client) +{ + auto f = std::make_shared(getTestDataPath("binlog.000016")); + auto e = std::make_shared(f); + + auto b1 = e->createBinlog("", "b1"); + auto b2 = e->createBinlog("", "b2"); + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + auto b3 = e->createBinlog("", "b3"); + + testFile2(*b3, timeout, true); + + b1 = nullptr; + b2 = nullptr; + + auto b4 = e->createBinlog("", "b4"); + testFile2(*b4, timeout, true); + + b3 = nullptr; + b4 = e->createBinlog("", "b4 2"); + testFile2(*b4, timeout, true); + + b1 = e->createBinlog("", "b1 2"); + b2 = e->createBinlog("", "b2 2"); + testFile2(*b1, timeout, true); + + b3 = e->createBinlog("", "b3 2"); + testFile2(*b2, timeout, true); + + b4 = e->createBinlog("", "b4 3"); + testFile2(*b3, timeout, true); + testFile2(*b4, timeout, true); + + b1 = nullptr; + b2 = nullptr; + b3 = nullptr; + b4 = nullptr; + b1 = e->createBinlog("", "b1 3"); + b2 = e->createBinlog("", "b2 3"); + b3 = e->createBinlog("", "b3 3"); + b4 = e->createBinlog("", "b4 4"); + testFile2(*b4, timeout, true); + testFile2(*b3, timeout, true); + testFile2(*b2, timeout, true); + testFile2(*b1, timeout, true); + + f = std::make_shared(getTestDataPath("binlog.000016")); + e = std::make_shared(f); + + b4 = e->createBinlog("", "b4 5"); + b3 = e->createBinlog("", "b3 4"); + testFile2(*b4, timeout, true); + b2 = e->createBinlog("", "b2 4"); + b1 = e->createBinlog("", "b1 4"); + testFile2(*b3, timeout, true); + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + + b1 = e->createBinlog("", "b1 5"); + b2 = e->createBinlog("", "b2 5"); + testFile2(*b1, timeout, true); + testFile2(*b2, timeout, true); + b1 = e->createBinlog("", "b1 6"); + testFile2(*b1, timeout, true); + b1 = e->createBinlog("", "b1 7"); + testFile2(*b1, timeout, true); + + b3 = nullptr; + b4 = nullptr; + b1 = e->createBinlog("", "b1 8"); + b4 = e->createBinlog("", "b4 6"); + b3 = e->createBinlog("", "b3 5"); + testFile2(*b4, timeout, true); + testFile2(*b3, timeout, true); + testFile2(*b1, timeout, true); + + b2 = nullptr; + b3 = nullptr; + b4 = nullptr; + b1 = nullptr; + b1 = e->createBinlog("", "b1 9"); + testFile2(*b1, timeout, true); +} + +TEST_F(MySQLBinlog, createBinlog) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto d1 = std::make_shared("d1"); + auto b1 = d1->start(f->createBinlog(""), "b1"); + ASSERT_TRUE(b1); + ASSERT_FALSE(d1->start(f->createBinlog(""))); + testFile1(*b1, timeout, true); + ASSERT_FALSE(d1->start(f->createBinlog(""))); + b1 = nullptr; + ASSERT_FALSE(d1->start(f->createBinlog(""))); +} + +TEST_F(MySQLBinlog, createBinlogAttach1) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto d1 = std::make_shared("d1"); + auto b1_ = d1->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"), "b1_"); + ASSERT_TRUE(b1_); + auto b1 = d1->attach("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831", "b1"); + if (b1) + { + BinlogEventPtr event; + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831"); + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831"); + + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831"); + + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831"); + + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, XID_EVENT); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87832"); + for (int i = 0; i < 17; ++i) + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_FALSE(b1->tryReadEvent(event, 10)); + ASSERT_EQ(b1->getPosition().binlog_pos, 4); + ASSERT_EQ(b1->getPosition().binlog_name, "binlog.001391"); + ASSERT_EQ(b1->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + for (int i = 0; i < 33; ++i) + ASSERT_TRUE(b1_->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[0].bytes, 0); + ASSERT_EQ(d1->getDispatcherMetadata().binlogs[1].bytes, 0); + } +} + +TEST_F(MySQLBinlog, createBinlogAttach2) +{ + BinlogEventPtr event; + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto d1 = std::make_shared("d1"); + auto d2 = std::make_shared("d2"); + auto d3 = std::make_shared("d3"); + auto d4 = std::make_shared("d4"); + + auto b1 = d1->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"), "b1"); + ASSERT_TRUE(b1); + ASSERT_TRUE(b1->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + + auto b2_ = d2->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"), "b2_"); + ASSERT_TRUE(b2_); + auto b2 = d2->attach("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87831", "b2"); + + auto b3_ = d3->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"), "b3_"); + ASSERT_TRUE(b3_); + auto b3 = d3->attach("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87835", "b3"); + + auto b4_ = d4->start(f->createBinlog("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828"), "b4_"); + ASSERT_TRUE(b4_); + auto b4 = d4->attach("a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87828", "b4"); + + /// There is a race with dispatcher thread + if (b2) + { + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, UPDATE_ROWS_EVENT_V2); + for (int i = 0; i < 18; ++i) + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_FALSE(b2->tryReadEvent(event, 10)); + ASSERT_EQ(b2->getPosition().binlog_pos, 4); + ASSERT_EQ(b2->getPosition().binlog_name, "binlog.001391"); + ASSERT_EQ(b2->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + for (int i = 0; i < 33; ++i) + ASSERT_TRUE(b2_->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs[0].bytes, 0); + ASSERT_EQ(d2->getDispatcherMetadata().binlogs[1].bytes, 0); + } + + if (b4) + { + ASSERT_TRUE(b4->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + + ASSERT_TRUE(b4->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + + ASSERT_TRUE(b4->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, WRITE_ROWS_EVENT_V2); + for (int i = 0; i < 10; ++i) + ASSERT_TRUE(b4->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_FALSE(b2->tryReadEvent(event, 10)); + ASSERT_EQ(b4->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + for (int i = 0; i < 33; ++i) + ASSERT_TRUE(b4_->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(d4->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d4->getDispatcherMetadata().binlogs[0].bytes, 0); + ASSERT_EQ(d4->getDispatcherMetadata().binlogs[1].bytes, 0); + } + + if (b3) + { + ASSERT_TRUE(b3->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, GTID_EVENT); + + ASSERT_TRUE(b3->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, QUERY_EVENT); + for (int i = 0; i < 3; ++i) + ASSERT_TRUE(b3->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_FALSE(b3->tryReadEvent(event, 10)); + ASSERT_EQ(b3->getPosition().gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:1-87836"); + for (int i = 0; i < 33; ++i) + ASSERT_TRUE(b3_->tryReadEvent(event, timeout)); + ASSERT_TRUE(event); + ASSERT_EQ(event->header.type, ROTATE_EVENT); + ASSERT_EQ(d3->getDispatcherMetadata().binlogs.size(), 2); + ASSERT_EQ(d3->getDispatcherMetadata().binlogs[0].bytes, 0); + ASSERT_EQ(d3->getDispatcherMetadata().binlogs[1].bytes, 0); + } +} + +TEST_F(MySQLBinlog, factoryThreads) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto func1 = [&] + { + auto b1 = f->createBinlog(""); + auto b2 = f->createBinlog(""); + auto b3 = f->createBinlog(""); + testFile1(*b1, timeout); + testFile1(*b2, timeout); + b2 = f->createBinlog(""); + testFile1(*b2, timeout); + b1 = f->createBinlog(""); + testFile1(*b1, timeout); + b1 = nullptr; + b2 = f->createBinlog(""); + testFile1(*b2, timeout); + b1 = f->createBinlog(""); + testFile1(*b1, timeout); + testFile1(*b3, timeout); + }; + + auto func2 = [&] + { + auto b1 = f->createBinlog(""); + auto b2 = f->createBinlog(""); + testFile1(*b2, timeout); + testFile1(*b1, timeout); + b1 = f->createBinlog(""); + testFile1(*b1, timeout); + b2 = f->createBinlog(""); + testFile1(*b2, timeout); + b1 = f->createBinlog(""); + b2 = f->createBinlog(""); + testFile1(*b1, timeout); + b2 = nullptr; + b1 = f->createBinlog(""); + testFile1(*b1, timeout); + b1 = nullptr; + }; + + int n = 4; + std::vector ts1, ts2; + for (int i = 0; i < n; ++i) + { + ts1.emplace_back(std::thread(func1)); + ts2.emplace_back(std::thread(func2)); + } + for (int i = 0; i < n; ++i) + { + ts1[i].join(); + ts2[i].join(); + } +} + +TEST_F(MySQLBinlog, clientThreads) +{ + auto f = std::make_shared(getTestDataPath("binlog.001390")); + auto e = std::make_shared(f); + auto func1 = [&] + { + auto b1 = e->createBinlog(""); + auto b2 = e->createBinlog(""); + testFile1(*b1, timeout, true); + testFile1(*b2, timeout, true); + b1 = nullptr; + b2 = nullptr; + b2 = e->createBinlog(""); + testFile1(*b2, timeout, true); + b1 = e->createBinlog(""); + testFile1(*b1, timeout, true); + b1 = nullptr; + b2 = e->createBinlog(""); + testFile1(*b2, timeout, true); + b2 = nullptr; + b1 = e->createBinlog(""); + testFile1(*b1, timeout, true); + }; + + auto func2 = [&] + { + auto b1 = e->createBinlog(""); + testFile1(*b1, timeout, true); + auto b2 = e->createBinlog(""); + testFile1(*b2, timeout, true); + b2 = e->createBinlog(""); + b1 = e->createBinlog(""); + testFile1(*b1, timeout, true); + testFile1(*b2, timeout, true); + b1 = nullptr; + b2 = nullptr; + b1 = e->createBinlog(""); + testFile1(*b1, timeout, true); + b2 = e->createBinlog(""); + testFile1(*b2, timeout, true); + }; + + int n = 4; + std::vector ts1, ts2; + for (int i = 0; i < n; ++i) + { + ts1.emplace_back(std::thread(func1)); + ts2.emplace_back(std::thread(func2)); + } + for (int i = 0; i < n; ++i) + { + ts1[i].join(); + ts2[i].join(); + } + + // All dispatchers synced and finished + // No dispatchers and no binlogs are alive here + ASSERT_EQ(e->getMetadata().dispatchers.size(), 0); + + // Creates new dispatcher + auto b1 = e->createBinlog("", "b1 1"); + testFile1(*b1, timeout, true); + + auto md = e->getMetadata().dispatchers; + ASSERT_EQ(md.size(), 1); + ASSERT_EQ(md[0].position.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87836"); + ASSERT_EQ(md[0].binlogs.size(), 1); + ASSERT_EQ(md[0].binlogs[0].position_read.gtid_sets.toString(), "a9d88f83-c14e-11ec-bb36-244bfedf7766:87828-87836"); + ASSERT_EQ(md[0].binlogs[0].size, 0); + ASSERT_EQ(md[0].binlogs[0].bytes, 0); + + // Creates new dispatcher + auto b1_2 = e->createBinlog("", "b1 2"); + + // Should sync to the first dispatcher + TRY_ASSERT_EQ(e->getMetadata().dispatchers.size(), 1, timeout); + // If there is no CPU available, + // it possible to catch in the middle of the transform between dispatchers. + // Checking again to make sure that catching up is finished. + TRY_ASSERT_EQ(e->getMetadata().dispatchers.size(), 1, timeout); + b1 = nullptr; + md = e->getMetadata().dispatchers; + ASSERT_EQ(md.size(), 1); + ASSERT_EQ(md[0].binlogs.size(), 1); + // Did not read any events yet + ASSERT_EQ(md[0].binlogs[0].position_read.gtid_sets.toString(), ""); + ASSERT_EQ(md[0].binlogs[0].position_read.binlog_pos, 0); + + auto b2 = e->createBinlog("", "b2"); + + BinlogEventPtr event; + // Read only one event + ASSERT_TRUE(b2->tryReadEvent(event, timeout)); + // Waits before all binlogs are moved to main dispatcher + TRY_ASSERT_EQ(e->getMetadata().dispatchers[0].binlogs.size(), 2, timeout); + + // One dispatcher is alive + md = e->getMetadata().dispatchers; + ASSERT_EQ(md.size(), 1); + ASSERT_EQ(md[0].binlogs.size(), 2); + ASSERT_EQ(md[0].binlogs[0].position_read.gtid_sets.toString(), ""); + ASSERT_EQ(md[0].binlogs[1].position_read.gtid_sets.toString(), ""); + ASSERT_EQ(md[0].binlogs[0].position_read.binlog_pos, md[0].binlogs[0].name == "b2" ? 276 : 0); // Read one event + ASSERT_EQ(md[0].binlogs[1].position_read.binlog_pos, md[0].binlogs[0].name == "b2" ? 0 : 276); +} diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index aa18cc6e0cb..59e242c7fe0 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -3,6 +3,30 @@ namespace DB { +String toString(DataSourceType data_source_type) +{ + switch (data_source_type) + { + case DataSourceType::Local: + return "local"; + case DataSourceType::RAM: + return "memory"; + case DataSourceType::S3: + return "s3"; + case DataSourceType::S3_Plain: + return "s3_plain"; + case DataSourceType::HDFS: + return "hdfs"; + case DataSourceType::WebServer: + return "web"; + case DataSourceType::AzureBlobStorage: + return "azure_blob_storage"; + case DataSourceType::LocalBlobStorage: + return "local_blob_storage"; + } + std::unreachable; +} + bool DataSourceDescription::operator==(const DataSourceDescription & other) const { return std::tie(type, description, is_encrypted) == std::tie(other.type, other.description, other.is_encrypted); diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 7d47fa8da78..82a00ccb3cc 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -18,29 +18,7 @@ enum class DataSourceType LocalBlobStorage, }; -inline String toString(DataSourceType data_source_type) -{ - switch (data_source_type) - { - case DataSourceType::Local: - return "local"; - case DataSourceType::RAM: - return "memory"; - case DataSourceType::S3: - return "s3"; - case DataSourceType::S3_Plain: - return "s3_plain"; - case DataSourceType::HDFS: - return "hdfs"; - case DataSourceType::WebServer: - return "web"; - case DataSourceType::AzureBlobStorage: - return "azure_blob_storage"; - case DataSourceType::LocalBlobStorage: - return "local_blob_storage"; - } - UNREACHABLE(); -} +String toString(DataSourceType data_source_type); struct DataSourceDescription { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index 6075b385a6c..0e5bd64b155 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -3,19 +3,11 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - using namespace Azure::Storage::Blobs; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index c3baf3fdbda..6962248c7e1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -258,12 +258,6 @@ String DiskObjectStorage::getUniqueId(const String & path) const bool DiskObjectStorage::checkUniqueId(const String & id) const { - if (!id.starts_with(object_key_prefix)) - { - LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}, Stack {}", id, object_key_prefix, StackTrace().toString()); - return false; - } - auto object = StoredObject(id); return object_storage->exists(object); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index acc1591f8a9..a664f11fab7 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -5,15 +5,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include namespace CurrentMetrics { diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 3271a190193..881f7a46c16 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) { readIntText(version, buf); + assertChar('\n', buf); if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_FULL_OBJECT_KEY) throw Exception( @@ -27,8 +28,6 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", metadata_file_path, toString(version), toString(VERSION_FULL_OBJECT_KEY)); - assertChar('\n', buf); - UInt32 keys_count; readIntText(keys_count, buf); assertChar('\t', buf); @@ -122,6 +121,7 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY); writeIntText(write_version, buf); + writeChar('\n', buf); writeIntText(keys_with_meta.size(), buf); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index beb8a400632..6a091471888 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -19,7 +19,6 @@ #include -#include #include #include #include @@ -556,27 +555,12 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( return std::make_unique( std::move(new_client), std::move(new_s3_settings), version_id, s3_capabilities, new_namespace, - endpoint, object_key_prefix, disk_name); + endpoint, key_generator, disk_name); } -ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const +ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const { - /// Path to store the new S3 object. - - /// Total length is 32 a-z characters for enough randomness. - /// First 3 characters are used as a prefix for - /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/ - - constexpr size_t key_name_total_size = 32; - constexpr size_t key_name_prefix_size = 3; - - /// Path to store new S3 object. - String key = fmt::format("{}/{}", - getRandomASCIIString(key_name_prefix_size), - getRandomASCIIString(key_name_total_size - key_name_prefix_size)); - - /// what ever key_prefix value is, consider that key as relative - return ObjectStorageKey::createAsRelative(object_key_prefix, key); + return key_generator->generate(path); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index c8b3aeaca28..caa4beaba3b 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -39,7 +40,6 @@ struct S3ObjectStorageSettings bool read_only; }; - class S3ObjectStorage : public IObjectStorage { private: @@ -53,10 +53,10 @@ private: const S3Capabilities & s3_capabilities_, String bucket_, String connection_string, - String object_key_prefix_, + ObjectStorageKeysGeneratorPtr key_generator_, const String & disk_name_) : bucket(std::move(bucket_)) - , object_key_prefix(std::move(object_key_prefix_)) + , key_generator(std::move(key_generator_)) , disk_name(disk_name_) , client(std::move(client_)) , s3_settings(std::move(s3_settings_)) @@ -179,7 +179,7 @@ private: private: std::string bucket; - String object_key_prefix; + ObjectStorageKeysGeneratorPtr key_generator; std::string disk_name; MultiVersion client; @@ -199,11 +199,6 @@ private: class S3PlainObjectStorage : public S3ObjectStorage { public: - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override - { - return ObjectStorageKey::createAsRelative(object_key_prefix, path); - } - std::string getName() const override { return "S3PlainObjectStorage"; } template diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 7543fb94331..a35a1eb2a82 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -91,6 +91,60 @@ private: } }; +std::pair getPrefixAndKeyGenerator( + String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + if (type == "s3_plain") + return {uri.key, createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key)}; + + chassert(type == "s3"); + + bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting(); + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + + if (send_metadata && storage_metadata_write_full_object_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong configuration in {}. " + "s3 does not supports feature 'send_metadata' with feature 'storage_metadata_write_full_object_key'.", + config_prefix); + + String object_key_compatibility_prefix = config.getString(config_prefix + ".key_compatibility_prefix", String()); + String object_key_template = config.getString(config_prefix + ".key_template", String()); + + if (object_key_template.empty()) + { + if (!object_key_compatibility_prefix.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong configuration in {}. " + "Setting 'key_compatibility_prefix' can be defined only with setting 'key_template'.", + config_prefix); + + return {uri.key, createObjectStorageKeysGeneratorByPrefix(uri.key)}; + } + + if (send_metadata) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong configuration in {}. " + "s3 does not supports send_metadata with setting 'key_template'.", + config_prefix); + + if (!storage_metadata_write_full_object_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong configuration in {}. " + "Feature 'storage_metadata_write_full_object_key' has to be enabled in order to use setting 'key_template'.", + config_prefix); + + if (!uri.key.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong configuration in {}. " + "URI.key is forbidden with settings 'key_template', use setting 'key_compatibility_prefix' instead'. " + "URI.key: '{}', bucket: '{}'. ", + config_prefix, + uri.key, uri.bucket); + + return {object_key_compatibility_prefix, createObjectStorageKeysGeneratorByTemplate(object_key_template)}; +} + } void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) @@ -104,7 +158,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); S3::URI uri(endpoint); - if (!uri.key.ends_with('/')) + // an empty key remains empty + if (!uri.key.empty() && !uri.key.ends_with('/')) uri.key.push_back('/'); S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); @@ -113,6 +168,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) String type = config.getString(config_prefix + ".type"); chassert(type == "s3" || type == "s3_plain"); + auto [object_key_compatibility_prefix, object_key_generator] = getPrefixAndKeyGenerator(type, uri, config, config_prefix); + MetadataStoragePtr metadata_storage; auto settings = getSettings(config, config_prefix, context); auto client = getClient(config, config_prefix, context, *settings); @@ -128,20 +185,18 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata"); s3_storage = std::make_shared( - std::move(client), std::move(settings), - uri.version_id, s3_capabilities, - uri.bucket, uri.endpoint, uri.key, name); - metadata_storage = std::make_shared(s3_storage, uri.key); + std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name); + + metadata_storage = std::make_shared(s3_storage, object_key_compatibility_prefix); } else { s3_storage = std::make_shared( - std::move(client), std::move(settings), - uri.version_id, s3_capabilities, - uri.bucket, uri.endpoint, uri.key, name); + std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name); auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - metadata_storage = std::make_shared(metadata_disk, uri.key); + + metadata_storage = std::make_shared(metadata_disk, object_key_compatibility_prefix); } /// NOTE: should we still perform this check for clickhouse-disks? @@ -164,7 +219,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) DiskObjectStoragePtr s3disk = std::make_shared( name, - uri.key, + uri.key, /// might be empty type == "s3" ? "DiskS3" : "DiskS3Plain", std::move(metadata_storage), std::move(s3_storage), diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index b61b6140b0c..587858ce774 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -100,6 +100,7 @@ TEST_F(DiskEncryptedTest, WriteAndRead) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } /// Now we have one file. @@ -130,6 +131,7 @@ TEST_F(DiskEncryptedTest, Append) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); @@ -140,6 +142,7 @@ TEST_F(DiskEncryptedTest, Append) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{" Another text"}, *buf); + buf->finalize(); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 22); @@ -156,6 +159,7 @@ TEST_F(DiskEncryptedTest, Truncate) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); @@ -185,6 +189,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Write nothing to a file. { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); + buf->finalize(); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -194,6 +199,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Append the file with nothing. { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); + buf->finalize(); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -219,6 +225,7 @@ TEST_F(DiskEncryptedTest, AnotherFolder) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } /// Now we have one file. @@ -239,10 +246,13 @@ TEST_F(DiskEncryptedTest, RandomIV) { auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } + { auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); + buf->finalize(); } /// Now we have two files. diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 89676594581..a06e898b7c5 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids) list (APPEND PRIVATE_LIBS ch_contrib::sqids) endif() +if (TARGET ch_contrib::idna) + list (APPEND PRIVATE_LIBS ch_contrib::idna) +endif() + if (TARGET ch_contrib::h3) list (APPEND PRIVATE_LIBS ch_contrib::h3) endif() diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index dd843daed8c..2219708be49 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -538,8 +538,8 @@ struct ToStartOfInterval else if (scale_multiplier > 1000000) { Int64 scale_diff = scale_multiplier / static_cast(1000000); - if (t >= 0) [[likely]] - return t / microseconds / scale_diff * microseconds; + if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result + return (t / microseconds + scale_diff / 2) / scale_diff * microseconds; else return ((t + 1) / microseconds / scale_diff - 1) * microseconds; } @@ -581,8 +581,8 @@ struct ToStartOfInterval else if (scale_multiplier > 1000) { Int64 scale_diff = scale_multiplier / static_cast(1000); - if (t >= 0) [[likely]] - return t / milliseconds / scale_diff * milliseconds; + if (t >= 0) [[likely]] /// When we divide the `t` value we should round the result + return (t / milliseconds + scale_diff / 2) / scale_diff * milliseconds; else return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; } diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index a5e819179d6..6cb23bbea9f 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -8,6 +8,8 @@ #include #include +#include + #ifdef __SSE4_2__ # include #endif @@ -25,7 +27,7 @@ struct FunctionStringDistanceImpl { using ResultType = typename Op::ResultType; - static void constantConstant(const std::string & haystack, const std::string & needle, ResultType & res) + static void constantConstant(const String & haystack, const String & needle, ResultType & res) { res = Op::process(haystack.data(), haystack.size(), needle.data(), needle.size()); } @@ -51,7 +53,7 @@ struct FunctionStringDistanceImpl } static void constantVector( - const std::string & haystack, + const String & haystack, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, PaddedPODArray & res) @@ -70,7 +72,7 @@ struct FunctionStringDistanceImpl static void vectorConstant( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - const std::string & needle, + const String & needle, PaddedPODArray & res) { constantVector(needle, data, offsets, res); @@ -81,7 +83,7 @@ struct FunctionStringDistanceImpl struct ByteHammingDistanceImpl { using ResultType = UInt64; - static ResultType inline process( + static ResultType process( const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) { UInt64 res = 0; @@ -115,7 +117,7 @@ template struct ByteJaccardIndexImpl { using ResultType = Float64; - static ResultType inline process( + static ResultType process( const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) { if (haystack_size == 0 || needle_size == 0) @@ -222,23 +224,23 @@ struct ByteJaccardIndexImpl } }; +static constexpr size_t max_string_size = 1u << 16; + struct ByteEditDistanceImpl { using ResultType = UInt64; - static constexpr size_t max_string_size = 1u << 16; - static ResultType inline process( + static ResultType process( const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) { if (haystack_size == 0 || needle_size == 0) return haystack_size + needle_size; - /// Safety threshold against DoS, since we use two array to calculate the distance. + /// Safety threshold against DoS, since we use two arrays to calculate the distance. if (haystack_size > max_string_size || needle_size > max_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, - "The string size is too big for function editDistance, " - "should be at most {}", max_string_size); + "The string size is too big for function editDistance, should be at most {}", max_string_size); PaddedPODArray distances0(haystack_size + 1, 0); PaddedPODArray distances1(haystack_size + 1, 0); @@ -271,6 +273,180 @@ struct ByteEditDistanceImpl } }; +struct ByteDamerauLevenshteinDistanceImpl +{ + using ResultType = UInt64; + + static ResultType process( + const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) + { + /// Safety threshold against DoS + if (haystack_size > max_string_size || needle_size > max_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "The string size is too big for function damerauLevenshteinDistance, should be at most {}", max_string_size); + + /// Shortcuts: + + if (haystack_size == 0) + return needle_size; + + if (needle_size == 0) + return haystack_size; + + if (haystack_size == needle_size && memcmp(haystack, needle, haystack_size) == 0) + return 0; + + /// Implements the algorithm for optimal string alignment distance from + /// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance + + /// Dynamically allocate memory for the 2D array + /// Allocating a 2D array, for convenience starts is an array of pointers to the start of the rows. + std::vector d((needle_size + 1) * (haystack_size + 1)); + std::vector starts(haystack_size + 1); + + /// Setting the pointers in starts to the beginning of (needle_size + 1)-long intervals. + /// Also initialize the row values based on the mentioned algorithm. + for (size_t i = 0; i <= haystack_size; ++i) + { + starts[i] = d.data() + (needle_size + 1) * i; + starts[i][0] = static_cast(i); + } + + for (size_t j = 0; j <= needle_size; ++j) + { + starts[0][j] = static_cast(j); + } + + for (size_t i = 1; i <= haystack_size; ++i) + { + for (size_t j = 1; j <= needle_size; ++j) + { + int cost = (haystack[i - 1] == needle[j - 1]) ? 0 : 1; + starts[i][j] = std::min(starts[i - 1][j] + 1, /// deletion + std::min(starts[i][j - 1] + 1, /// insertion + starts[i - 1][j - 1] + cost) /// substitution + ); + if (i > 1 && j > 1 && haystack[i - 1] == needle[j - 2] && haystack[i - 2] == needle[j - 1]) + starts[i][j] = std::min(starts[i][j], starts[i - 2][j - 2] + 1); /// transposition + } + } + + return starts[haystack_size][needle_size]; + } +}; + +struct ByteJaroSimilarityImpl +{ + using ResultType = Float64; + + static ResultType process( + const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) + { + /// Safety threshold against DoS + if (haystack_size > max_string_size || needle_size > max_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "The string size is too big for function jaroSimilarity, should be at most {}", max_string_size); + + /// Shortcuts: + + if (haystack_size == 0) + return needle_size; + + if (needle_size == 0) + return haystack_size; + + if (haystack_size == needle_size && memcmp(haystack, needle, haystack_size) == 0) + return 1.0; + + const int s1len = static_cast(haystack_size); + const int s2len = static_cast(needle_size); + + /// Window size to search for matches in the other string + const int max_range = std::max(0, std::max(s1len, s2len) / 2 - 1); + std::vector s1_matching(s1len, -1); + std::vector s2_matching(s2len, -1); + + /// Calculate matching characters + size_t matching_characters = 0; + for (int i = 0; i < s1len; i++) + { + /// Matching window + const int min_index = std::max(i - max_range, 0); + const int max_index = std::min(i + max_range + 1, s2len); + for (int j = min_index; j < max_index; j++) + { + if (s2_matching[j] == -1 && haystack[i] == needle[j]) + { + s1_matching[i] = i; + s2_matching[j] = j; + matching_characters++; + break; + } + } + } + + if (matching_characters == 0) + return 0.0; + + /// Transpositions (one-way only) + double transpositions = 0.0; + for (size_t i = 0, s1i = 0, s2i = 0; i < matching_characters; i++) + { + while (s1_matching[s1i] == -1) + s1i++; + while (s2_matching[s2i] == -1) + s2i++; + if (haystack[s1i] != needle[s2i]) + transpositions += 0.5; + s1i++; + s2i++; + } + + double m = static_cast(matching_characters); + double jaro_similarity = 1.0 / 3.0 * (m / static_cast(s1len) + + m / static_cast(s2len) + + (m - transpositions) / m); + return jaro_similarity; + } +}; + +struct ByteJaroWinklerSimilarityImpl +{ + using ResultType = Float64; + + static ResultType process( + const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size) + { + static constexpr int max_prefix_length = 4; + static constexpr double scaling_factor = 0.1; + static constexpr double boost_threshold = 0.7; + + /// Safety threshold against DoS + if (haystack_size > max_string_size || needle_size > max_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "The string size is too big for function jaroWinklerSimilarity, should be at most {}", max_string_size); + + const int s1len = static_cast(haystack_size); + const int s2len = static_cast(needle_size); + + ResultType jaro_winkler_similarity = ByteJaroSimilarityImpl::process(haystack, haystack_size, needle, needle_size); + + if (jaro_winkler_similarity > boost_threshold) + { + const int common_length = std::min(max_prefix_length, std::min(s1len, s2len)); + int common_prefix = 0; + while (common_prefix < common_length && haystack[common_prefix] == needle[common_prefix]) + common_prefix++; + + jaro_winkler_similarity += common_prefix * scaling_factor * (1.0 - jaro_winkler_similarity); + } + return jaro_winkler_similarity; + } +}; + struct NameByteHammingDistance { static constexpr auto name = "byteHammingDistance"; @@ -283,6 +459,12 @@ struct NameEditDistance }; using FunctionEditDistance = FunctionsStringSimilarity, NameEditDistance>; +struct NameDamerauLevenshteinDistance +{ + static constexpr auto name = "damerauLevenshteinDistance"; +}; +using FunctionDamerauLevenshteinDistance = FunctionsStringSimilarity, NameDamerauLevenshteinDistance>; + struct NameJaccardIndex { static constexpr auto name = "stringJaccardIndex"; @@ -295,6 +477,18 @@ struct NameJaccardIndexUTF8 }; using FunctionStringJaccardIndexUTF8 = FunctionsStringSimilarity>, NameJaccardIndexUTF8>; +struct NameJaroSimilarity +{ + static constexpr auto name = "jaroSimilarity"; +}; +using FunctionJaroSimilarity = FunctionsStringSimilarity, NameJaroSimilarity>; + +struct NameJaroWinklerSimilarity +{ + static constexpr auto name = "jaroWinklerSimilarity"; +}; +using FunctionJaroWinklerSimilarity = FunctionsStringSimilarity, NameJaroWinklerSimilarity>; + REGISTER_FUNCTION(StringDistance) { factory.registerFunction( @@ -305,9 +499,18 @@ REGISTER_FUNCTION(StringDistance) FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"}); factory.registerAlias("levenshteinDistance", NameEditDistance::name); + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"}); + factory.registerFunction( - FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"}); + FunctionDocumentation{.description = R"(Calculates the Jaccard similarity index between two byte strings.)"}); factory.registerFunction( - FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two UTF8 strings.)"}); + FunctionDocumentation{.description = R"(Calculates the Jaccard similarity index between two UTF8 strings.)"}); + + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the Jaro similarity between two byte-string.)"}); + + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the Jaro-Winkler similarity between two byte-string.)"}); } } diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 26f4b8000ec..ecb4de97dc2 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -2,17 +2,9 @@ #include #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 51f2ef659cd..cea407aee02 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1,23 +1,25 @@ -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include +#include namespace DB @@ -46,7 +48,7 @@ class FunctionArrayElement : public IFunction { public: static constexpr auto name = "arrayElement"; - static FunctionPtr create(ContextPtr context); + static FunctionPtr create(ContextPtr context_); String getName() const override; @@ -63,16 +65,49 @@ private: ArrayImpl::NullMapBuilder & builder, size_t input_rows_count) const; template - static ColumnPtr executeNumberConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeNumberConst( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const Field & index, + ArrayImpl::NullMapBuilder & builder); template - static ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeNumber( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder); + + /// Optimize arrayElement when first argument has type Array(T) or Array(Nullable(T)) in which T is number type + template + static ColumnPtr executeArrayNumberConst( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const Field & index, + ArrayImpl::NullMapBuilder & builder); + + /// Optimize arrayElement when first argument has type Array(T) or Array(Nullable(T)) in which T is number type + template + static ColumnPtr executeArrayNumber( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder); static ColumnPtr executeStringConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); template static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + /// Optimize arrayElement when first argument has type Array(String) or Array(Nullable(String)) + static ColumnPtr + executeArrayStringConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); + + /// Optimize arrayElement when first argument has type Array(String) or Array(Nullable(String)) + template + static ColumnPtr executeArrayString( + const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeGenericConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); template @@ -91,6 +126,10 @@ private: */ ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const; + /** For a map array, the function is evaluated component-wise for its keys and values + */ + ColumnPtr executeMap2(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const; + /** For a map the function finds the matched value for a key. * Currently implemented just as linear search in array. * However, optimizations are possible. @@ -123,6 +162,8 @@ private: static void executeMatchConstKeyToIndex( size_t num_rows, size_t num_values, PaddedPODArray & matched_idxs, const Matcher & matcher); + + ContextPtr context; }; @@ -181,9 +222,11 @@ struct ArrayElementNumImpl */ template static void vectorConst( - const PaddedPODArray & data, const ColumnArray::Offsets & offsets, + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, const ColumnArray::Offset index, - PaddedPODArray & result, ArrayImpl::NullMapBuilder & builder) + PaddedPODArray & result, + ArrayImpl::NullMapBuilder & builder) { size_t size = offsets.size(); result.resize(size); @@ -220,9 +263,11 @@ struct ArrayElementNumImpl */ template static void vector( - const PaddedPODArray & data, const ColumnArray::Offsets & offsets, + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, const PaddedPODArray & indices, - PaddedPODArray & result, ArrayImpl::NullMapBuilder & builder) + PaddedPODArray & result, + ArrayImpl::NullMapBuilder & builder) { size_t size = offsets.size(); result.resize(size); @@ -262,6 +307,354 @@ struct ArrayElementNumImpl } }; + +template +struct ArrayElementArrayNumImpl +{ + /** Implementation for constant index. + * If negative = false - index is from beginning of array, started from 0. + * If negative = true - index is from end of array, started from 0. + * nullable_number is true when process input with type Array(Array(Nullable(T))) + */ + template + static void vectorConst( + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const NullMap * number_null_map, + const ColumnArray::Offset index, + PaddedPODArray & result_data, + ColumnArray::Offsets & result_offsets, + NullMap * result_number_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate the size of result_data or result_number_null_map + size_t result_data_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t j = !negative ? (offsets[i - 1] + index) : (offsets[i] - index - 1); + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + result_data_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + if constexpr (nullable_number) + result_number_null_map->resize(result_data_size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t j = !negative ? (offsets[i - 1] + index) : (offsets[i] - index - 1); + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else + { + /// Empty Array(T), no need to copy anything + if (builder) + builder.update(); + } + + result_offsets[i] = current_offset; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const NullMap * number_null_map, + const PaddedPODArray & indices, + PaddedPODArray & result_data, + ColumnArray::Offsets & result_offsets, + NullMap * result_number_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate the size of result_data or result_number_null_map + size_t result_data_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + { + size_t j = offsets[i - 1] + index - 1; + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + result_data_size += nested_array_size; + } + else if (index < 0 && -static_cast(index) <= array_size) + { + size_t j = offsets[i] + index; + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + result_data_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + if constexpr (nullable_number) + result_number_null_map->resize(result_data_size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i - 1]; + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + { + size_t j = offsets[i - 1] + index - 1; + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else if (index < 0 && -static_cast(index) <= array_size) + { + size_t j = offsets[i] + index; + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else + { + /// Empty Array(T), no need to copy anything + if (builder) + builder.update(); + } + + result_offsets[i] = current_offset; + } + } +}; + +struct ArrayElementArrayStringImpl +{ + /// nullable_string is true when process input with type Array(Array(Nullable(String))) + template + static void vectorConst( + const ColumnString::Chars & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const ColumnString::Offsets & string_offsets, + const NullMap * string_null_map, + const ColumnArray::Offset index, + ColumnString::Chars & result_data, + ColumnArray::Offsets & result_offsets, + ColumnArray::Offsets & result_string_offsets, + NullMap * result_string_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate size of result_data(total count of strings) and result_string_offsets(total size of strings) + size_t result_data_size = 0; + size_t result_strings_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t adjusted_index = !negative ? index : (array_size - index - 1); + size_t j = offsets[i - 1] + adjusted_index; + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + result_data_size += string_offsets[nested_array_start + nested_array_size - 1] - string_offsets[nested_array_start - 1]; + result_strings_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + result_string_offsets.reserve(result_strings_size); + if constexpr (nullable_string) + result_string_null_map->reserve(result_strings_size); + + ColumnArray::Offset current_offset = 0; + ColumnArray::Offset current_string_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + + if (index < array_size) + { + size_t adjusted_index = !negative ? index : (array_size - index - 1); + + size_t j = offsets[i - 1] + adjusted_index; + if (builder) + builder.update(j); + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + /// For each String in Array(String), append it to result_data and update result_offsets and result_string_offsets + for (size_t k = 0; k < nested_array_size; ++k) + { + auto string_start = string_offsets[nested_array_start + k - 1]; + auto string_size = string_offsets[nested_array_start + k] - string_start; + memcpySmallAllowReadWriteOverflow15(&result_data[current_string_offset], &data[string_start], string_size); + current_string_offset += string_size; + result_string_offsets.push_back(current_string_offset); + + if constexpr (nullable_string) + result_string_null_map->push_back((*string_null_map)[nested_array_start + k]); + } + current_offset += nested_array_size; + } + else + { + /// Insert empty Array(String) or Array(Nullable(String)), no need to copy anything + if (builder) + builder.update(); + } + result_offsets[i] = current_offset; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const ColumnString::Chars & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const ColumnString::Offsets & string_offsets, + const NullMap * string_null_map, + const PaddedPODArray & indices, + ColumnString::Chars & result_data, + ColumnArray::Offsets & result_offsets, + ColumnArray::Offsets & result_string_offsets, + NullMap * result_string_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate size of result_data(total count of strings) and result_string_offsets(total size of strings) + size_t result_data_size = 0; + size_t result_strings_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + size_t adjusted_index; /// index in array from zero + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + adjusted_index = index - 1; + else if (index < 0 && -static_cast(index) <= array_size) + adjusted_index = array_size + index; + else + adjusted_index = array_size; /// means no element should be taken + + if (adjusted_index < array_size) + { + size_t j = offsets[i - 1] + adjusted_index; + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + result_data_size += string_offsets[nested_array_start + nested_array_size - 1] - string_offsets[nested_array_start - 1]; + result_strings_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + result_string_offsets.reserve(result_strings_size); + if constexpr (nullable_string) + result_string_null_map->reserve(result_strings_size); + + ColumnArray::Offset current_offset = 0; + ColumnArray::Offset current_string_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + size_t adjusted_index; /// index in array from zero + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + adjusted_index = index - 1; + else if (index < 0 && -static_cast(index) <= array_size) + adjusted_index = array_size + index; + else + adjusted_index = array_size; /// means no element should be taken + + + if (adjusted_index < array_size) + { + size_t j = offsets[i - 1] + adjusted_index; + if (builder) + builder.update(j); + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + /// For each String in Array(String), append it to result_data and update result_offsets and result_string_offsets + for (size_t k = 0; k < nested_array_size; ++k) + { + auto string_start = string_offsets[nested_array_start + k - 1]; + auto string_size = string_offsets[nested_array_start + k] - string_start; + memcpySmallAllowReadWriteOverflow15(&result_data[current_string_offset], &data[string_start], string_size); + current_string_offset += string_size; + result_string_offsets.push_back(current_string_offset); + + if constexpr (nullable_string) + result_string_null_map->push_back((*string_null_map)[nested_array_start + k]); + } + current_offset += nested_array_size; + } + else + { + /// Insert empty Array(String), no need to copy anything + if (builder) + builder.update(); + } + result_offsets[i] = current_offset; + } + } +}; + + struct ArrayElementStringImpl { template @@ -495,25 +888,28 @@ FunctionPtr FunctionArrayElement::create(ContextPtr) template ColumnPtr FunctionArrayElement::executeNumberConst( - const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const Field & index, ArrayImpl::NullMapBuilder & builder) { - const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + using ColVecType = ColumnVectorOrDecimal; + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); if (!col_array) return nullptr; - const ColumnVector * col_nested = checkAndGetColumn>(&col_array->getData()); - + const ColVecType * col_nested = checkAndGetColumn(&col_array->getData()); if (!col_nested) return nullptr; - auto col_res = ColumnVector::create(); + auto col_res = result_type->createColumn(); + ColVecType * col_res_vec = typeid_cast(col_res.get()); + if (!col_res_vec) + return nullptr; if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) { ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res_vec->getData(), builder); } else if (index.getType() == Field::Types::Int64) { @@ -525,9 +921,8 @@ ColumnPtr FunctionArrayElement::executeNumberConst( /// Negative array indices work this way: /// arr[-1] is the element at offset 0 from the last /// arr[-2] is the element at offset 1 from the last and so on. - ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res_vec->getData(), builder); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); @@ -537,22 +932,28 @@ ColumnPtr FunctionArrayElement::executeNumberConst( template ColumnPtr FunctionArrayElement::executeNumber( - const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder) { - const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + using ColVecType = ColumnVectorOrDecimal; + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); if (!col_array) return nullptr; - const ColumnVector * col_nested = checkAndGetColumn>(&col_array->getData()); - + const ColVecType * col_nested = checkAndGetColumn(&col_array->getData()); if (!col_nested) return nullptr; - auto col_res = ColumnVector::create(); + auto col_res = result_type->createColumn(); + ColVecType * col_res_vec = typeid_cast(col_res.get()); + if (!col_res_vec) + return nullptr; ArrayElementNumImpl::template vector( - col_nested->getData(), col_array->getOffsets(), indices, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), indices, col_res_vec->getData(), builder); return col_res; } @@ -621,6 +1022,268 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument return col_res; } +ColumnPtr FunctionArrayElement::executeArrayStringConst( + const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) +{ + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(&col_array->getData()); + if (!col_nested_array) + return nullptr; + + const ColumnString * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(col_nested_array->getDataPtr().get()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + const auto * string_null_map = col_nullable ? &col_nullable->getNullMapColumn() : nullptr; + auto res_string = ColumnString::create(); + auto res_offsets = ColumnArray::ColumnOffsets::create(); + auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; + if (index.getType() == Field::Types::UInt64 + || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + { + if (col_nullable) + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + index.get() - 1, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + index.get() - 1, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + } + else if (index.getType() == Field::Types::Int64) + { + if (col_nullable) + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + -(UInt64(index.get()) + 1), + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + -(UInt64(index.get()) + 1), + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); + + if (col_nullable) + return ColumnArray::create(ColumnNullable::create(std::move(res_string), std::move(res_string_null_map)), std::move(res_offsets)); + else + return ColumnArray::create(std::move(res_string), std::move(res_offsets)); +} + +template +ColumnPtr FunctionArrayElement::executeArrayNumberConst( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const Field & index, ArrayImpl::NullMapBuilder & builder) +{ + using ColVecType = ColumnVectorOrDecimal; + + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(col_array->getDataPtr().get()); + if (!col_nested_array) + return nullptr; + + const ColVecType * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + auto res = result_type->createColumn(); + ColumnArray * res_array = typeid_cast(res.get()); + if (!res_array) + return nullptr; + + ColVecType * res_data = nullptr; + ColumnNullable * res_nullable = typeid_cast(&res_array->getData()); + if (!res_nullable) + res_data = typeid_cast(&res_array->getData()); + else + res_data = typeid_cast(&res_nullable->getNestedColumn()); + + const NullMap * null_map = col_nullable ? &col_nullable->getNullMapData() : nullptr; + auto & res_offsets = res_array->getOffsets(); + NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; + + if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + { + if (col_nullable) + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + index.get() - 1, + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + index.get() - 1, + res_data->getData(), + res_offsets, + res_null_map, + builder); + } + else if (index.getType() == Field::Types::Int64) + { + /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. + /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, + /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). + /// This is only needed to avoid UBSan report. + + /// Negative array indices work this way: + /// arr[-1] is the element at offset 0 from the last + /// arr[-2] is the element at offset 1 from the last and so on. + if (col_nullable) + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + -(static_cast(index.safeGet()) + 1), + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + -(static_cast(index.safeGet()) + 1), + res_data->getData(), + res_offsets, + res_null_map, + builder); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); + + return res; +} + +template +ColumnPtr FunctionArrayElement::executeArrayNumber( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) +{ + using ColVecType = ColumnVectorOrDecimal; + + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(col_array->getDataPtr().get()); + if (!col_nested_array) + return nullptr; + + const ColVecType * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + MutableColumnPtr res = result_type->createColumn(); + ColumnArray * res_array = typeid_cast(res.get()); + if (!res_array) + return nullptr; + + ColVecType * res_data = nullptr; + ColumnNullable * res_nullable = typeid_cast(&res_array->getData()); + if (!res_nullable) + res_data = typeid_cast(&res_array->getData()); + else + res_data = typeid_cast(&res_nullable->getNestedColumn()); + + const NullMap * null_map = col_nullable ? &col_nullable->getNullMapData() : nullptr; + auto & res_offsets = res_array->getOffsets(); + NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; + + if (col_nullable) + ArrayElementArrayNumImpl::template vector( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + indices, + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vector( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + indices, + res_data->getData(), + res_offsets, + res_null_map, + builder); + return res; +} + template ColumnPtr FunctionArrayElement::executeString( const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) @@ -659,6 +1322,66 @@ ColumnPtr FunctionArrayElement::executeString( return col_res; } +template +ColumnPtr FunctionArrayElement::executeArrayString( + const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) +{ + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(&col_array->getData()); + if (!col_nested_array) + return nullptr; + + const ColumnString * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + const auto * string_null_map = col_nullable ? &col_nullable->getNullMapColumn() : nullptr; + auto res_string = ColumnString::create(); + auto res_offsets = ColumnArray::ColumnOffsets::create(); + auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; + + if (col_nullable) + ArrayElementArrayStringImpl::vector( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + indices, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vector( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + indices, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + + if (col_nullable) + return ColumnArray::create(ColumnNullable::create(std::move(res_string), std::move(res_string_null_map)), std::move(res_offsets)); + else + return ColumnArray::create(std::move(res_string), std::move(res_offsets)); +} + ColumnPtr FunctionArrayElement::executeGenericConst( const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) { @@ -758,34 +1481,119 @@ ColumnPtr FunctionArrayElement::executeArgument( builder.initSink(index_data.size()); ColumnPtr res; - if (!((res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeConst(arguments, result_type, index_data, builder, input_rows_count)) - || (res = executeString(arguments, index_data, builder)) - || (res = executeGeneric(arguments, index_data, builder)))) + if (!((res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeConst(arguments, result_type, index_data, builder, input_rows_count)) + || (res = executeString(arguments, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayString(arguments, index_data, builder)) + || (res = executeGeneric(arguments, index_data, builder)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); return res; } +ColumnPtr FunctionArrayElement::executeMap2(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const +{ + const ColumnArray * col_array = typeid_cast(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnMap * col_map = typeid_cast(&col_array->getData()); + if (!col_map) + return nullptr; + + const ColumnArray * col_map_nested = &col_map->getNestedColumn(); + const ColumnTuple * col_map_kv = checkAndGetColumn(col_map_nested->getDataPtr().get()); + ColumnPtr col_map_keys = col_map_kv->getColumnPtr(0); + ColumnPtr col_map_values = col_map_kv->getColumnPtr(1); + + const DataTypeMap & map_type + = typeid_cast(*typeid_cast(*arguments[0].type).getNestedType()); + const auto & key_type = map_type.getKeyType(); + const auto & value_type = map_type.getValueType(); + + ColumnsWithTypeAndName temporary_results(2); + temporary_results[1] = arguments[1]; + + ColumnPtr result_key_column; + /// Calculate the function for the keys of the map. + { + ColumnWithTypeAndName array_of_keys; + array_of_keys.column + = ColumnArray::create(ColumnArray::create(col_map_keys, col_map_nested->getOffsetsPtr()), col_array->getOffsetsPtr()); + array_of_keys.type = std::make_shared(std::make_shared(key_type)); + temporary_results[0] = std::move(array_of_keys); + + auto type = getReturnTypeImpl({temporary_results[0].type, temporary_results[1].type}); + auto col = executeImpl(temporary_results, type, input_rows_count); + result_key_column = std::move(col); + } + + /// Calculate the function for the values of the map + ColumnPtr result_value_column; + { + ColumnWithTypeAndName array_of_values; + array_of_values.column + = ColumnArray::create(ColumnArray::create(col_map_values, col_map_nested->getOffsetsPtr()), col_array->getOffsetsPtr()); + array_of_values.type = std::make_shared(std::make_shared(value_type)); + temporary_results[0] = std::move(array_of_values); + + auto type = getReturnTypeImpl({temporary_results[0].type, temporary_results[1].type}); + auto col = executeImpl(temporary_results, type, input_rows_count); + result_value_column = std::move(col); + } + + const auto & data_keys = typeid_cast(*result_key_column).getDataPtr(); + const auto & data_values = typeid_cast(*result_value_column).getDataPtr(); + const auto & offsets = typeid_cast(*result_key_column).getOffsetsPtr(); + auto result_nested_column = ColumnArray::create(ColumnTuple::create(Columns{data_keys, data_values}), offsets); + return ColumnMap::create(std::move(result_nested_column)); +} + ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const ColumnArray * col_array = typeid_cast(arguments[0].column.get()); - if (!col_array) return nullptr; const ColumnTuple * col_nested = typeid_cast(&col_array->getData()); - if (!col_nested) return nullptr; @@ -1275,6 +2083,8 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments ColumnPtr res; if ((res = executeTuple(arguments, input_rows_count))) return res; + else if ((res = executeMap2(arguments, input_rows_count))) + return res; else if (!isColumnConst(*arguments[1].column)) { if (!((res = executeArgument(arguments, result_type, builder, input_rows_count)) @@ -1300,20 +2110,52 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments if (index == 0u) throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Array indices are 1-based"); - if (!((res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeStringConst (arguments, index, builder)) - || (res = executeGenericConst (arguments, index, builder)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + if (!((res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeStringConst(arguments, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayStringConst(arguments, index, builder)) + || (res = executeGenericConst(arguments, index, builder)))) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); } return res; diff --git a/src/Functions/checkHyperscanRegexp.h b/src/Functions/checkHyperscanRegexp.h index a4b2b615049..a11a8d5ec48 100644 --- a/src/Functions/checkHyperscanRegexp.h +++ b/src/Functions/checkHyperscanRegexp.h @@ -1,17 +1,9 @@ #pragma once +#include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp new file mode 100644 index 00000000000..a73347400c6 --- /dev/null +++ b/src/Functions/idna.cpp @@ -0,0 +1,202 @@ +#include "config.h" + +#if USE_IDNA + +#include +#include +#include + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wnewline-eof" +#endif +# include +# include +# include +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; +} + +/// Implementation of +/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode +/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode + +enum class ErrorHandling +{ + Throw, /// Throw exception + Empty /// Return empty string +}; + + +/// Translates a UTF-8 string (typically an Internationalized Domain Name for Applications, IDNA) to an ASCII-encoded equivalent. The +/// encoding is performed per domain component and based on Punycode with ASCII Compatible Encoding (ACE) prefix "xn--". +/// Example: "straße.münchen.de" --> "xn--strae-oqa.xn--mnchen-3ya.de" +/// Note: doesn't do percent decoding. Doesn't trim tabs, spaces or control characters. Expects non-empty inputs. +template +struct IdnaEncode +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::string ascii; + for (size_t row = 0; row < rows; ++row) + { + const char * value = reinterpret_cast(&data[prev_offset]); + const size_t value_length = offsets[row] - prev_offset - 1; + + std::string_view value_view(value, value_length); + if (!value_view.empty()) /// to_ascii() expects non-empty input + { + ascii = ada::idna::to_ascii(value_view); + const bool ok = !ascii.empty(); + if (!ok) + { + if constexpr (error_handling == ErrorHandling::Throw) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view); + } + else + { + static_assert(error_handling == ErrorHandling::Empty); + ascii.clear(); + } + } + } + + res_data.insert(ascii.c_str(), ascii.c_str() + ascii.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + ascii.clear(); + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); + } +}; + +/// Translates an ASII-encoded IDNA string back to its UTF-8 representation. +struct IdnaDecode +{ + /// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling. + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::string unicode; + for (size_t row = 0; row < rows; ++row) + { + const char * ascii = reinterpret_cast(&data[prev_offset]); + const size_t ascii_length = offsets[row] - prev_offset - 1; + std::string_view ascii_view(ascii, ascii_length); + + unicode = ada::idna::to_unicode(ascii_view); + + res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + unicode.clear(); + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); + } +}; + +struct NameIdnaEncode { static constexpr auto name = "idnaEncode"; }; +struct NameTryIdnaEncode { static constexpr auto name = "tryIdnaEncode"; }; +struct NameIdnaDecode { static constexpr auto name = "idnaDecode"; }; + +using FunctionIdnaEncode = FunctionStringToString, NameIdnaEncode>; +using FunctionTryIdnaEncode = FunctionStringToString, NameTryIdnaEncode>; +using FunctionIdnaDecode = FunctionStringToString; + +REGISTER_FUNCTION(Idna) +{ + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)", + .syntax="idnaEncode(str)", + .arguments={{"str", "Input string"}}, + .returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT idnaEncode('straße.münchen.de') AS ascii;", + R"( +┌─ascii───────────────────────────┐ +│ xn--strae-oqa.xn--mnchen-3ya.de │ +└─────────────────────────────────┘ + )" + }} + }); + + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)", + .syntax="punycodeEncode(str)", + .arguments={{"str", "Input string"}}, + .returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT idnaEncodeOrNull('München') AS ascii;", + R"( +┌─ascii───────────────────────────┐ +│ xn--strae-oqa.xn--mnchen-3ya.de │ +└─────────────────────────────────┘ + )" + }} + }); + + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)", + .syntax="idnaDecode(str)", + .arguments={{"str", "Input string"}}, + .returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT idnaDecode('xn--strae-oqa.xn--mnchen-3ya.de') AS unicode;", + R"( +┌─unicode───────────┐ +│ straße.münchen.de │ +└───────────────────┘ + )" + }} + }); +} + +} + +#endif + diff --git a/src/Functions/keyvaluepair/impl/NeedleFactory.h b/src/Functions/keyvaluepair/impl/NeedleFactory.h index b5f0645abcc..83862a2281a 100644 --- a/src/Functions/keyvaluepair/impl/NeedleFactory.h +++ b/src/Functions/keyvaluepair/impl/NeedleFactory.h @@ -3,6 +3,7 @@ #include #include +#include #include namespace DB diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 1381e9f2828..987cf4eb1a9 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -434,7 +434,7 @@ public: }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, isColumnConst, "const Number"}, + {optional_argument_names[0], &isNumber, nullptr, "const Number"}, {optional_argument_names[1], &isNumber, isColumnConst, "const Number"}, {optional_argument_names[2], &isString, isColumnConst, "const String"} }; diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index cdb9ca061c3..d0f5a1ce439 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -143,7 +143,6 @@ public: * depending on values of conditions. */ - std::vector instructions; instructions.reserve(arguments.size() / 2 + 1); @@ -238,7 +237,7 @@ public: } const auto & settings = context->getSettingsRef(); - const WhichDataType which(result_type); + const WhichDataType which(removeNullable(result_type)); bool execute_multiif_columnar = settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); @@ -254,8 +253,12 @@ public: if (which.is##TYPE()) \ { \ MutableColumnPtr res = ColumnVector::create(rows); \ - executeInstructionsColumnar(instructions, rows, res); \ - return std::move(res); \ + MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \ + executeInstructionsColumnar(instructions, rows, res, null_map, result_type->isNullable()); \ + if (!result_type->isNullable()) \ + return std::move(res); \ + else \ + return ColumnNullable::create(std::move(res), std::move(null_map)); \ } #define ENUMERATE_NUMERIC_TYPES(M, INDEX) \ @@ -295,6 +298,7 @@ public: } private: + static void executeInstructions(std::vector & instructions, size_t rows, const MutableColumnPtr & res) { for (size_t i = 0; i < rows; ++i) @@ -374,17 +378,59 @@ private: } template - static void executeInstructionsColumnar(std::vector & instructions, size_t rows, const MutableColumnPtr & res) + static void executeInstructionsColumnar(std::vector & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable) { PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); PaddedPODArray & res_data = assert_cast &>(*res).getData(); - for (size_t row_i = 0; row_i < rows; ++row_i) + if (!nullable) { - auto & instruction = instructions[inserts[row_i]]; - auto ref = instruction.source->getDataAt(row_i); - res_data[row_i] = *reinterpret_cast(ref.data); + for (size_t row_i = 0; row_i < rows; ++row_i) + { + auto & instruction = instructions[inserts[row_i]]; + auto ref = instruction.source->getDataAt(row_i); + res_data[row_i] = *reinterpret_cast(ref.data); + } + } + else + { + PaddedPODArray & null_map_data = assert_cast(*null_map).getData(); + std::vector data_cols(instructions.size()); + std::vector null_map_cols(instructions.size()); + ColumnPtr shared_null_map_col = nullptr; + for (size_t i = 0; i < instructions.size(); ++i) + { + if (instructions[i].source->isNullable()) + { + const ColumnNullable * nullable_col; + if (!instructions[i].source_is_constant) + nullable_col = assert_cast(instructions[i].source.get()); + else + { + const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); + nullable_col = assert_cast(data_column.get()); + } + null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); + data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); + } + else + { + if (!shared_null_map_col) + { + shared_null_map_col = ColumnUInt8::create(rows, 0); + } + null_map_cols[i] = assert_cast(*shared_null_map_col).getData().data(); + data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); + } + } + for (size_t row_i = 0; row_i < rows; ++row_i) + { + auto & instruction = instructions[inserts[row_i]]; + size_t index = instruction.source_is_constant ? 0 : row_i; + res_data[row_i] = *(data_cols[inserts[row_i]] + index); + null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index); + } } } diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp new file mode 100644 index 00000000000..159189744bd --- /dev/null +++ b/src/Functions/punycode.cpp @@ -0,0 +1,206 @@ +#include "config.h" + +#if USE_IDNA + +#include +#include +#include + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wnewline-eof" +#endif +# include +# include +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; +} + +/// Implementation of +/// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode + +enum class ErrorHandling +{ + Throw, /// Throw exception + Empty /// Return empty string +}; + + +struct PunycodeEncode +{ + /// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out. + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::u32string value_utf32; + std::string value_puny; + for (size_t row = 0; row < rows; ++row) + { + const char * value = reinterpret_cast(&data[prev_offset]); + const size_t value_length = offsets[row] - prev_offset - 1; + + const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length); + value_utf32.resize(value_utf32_length); + const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data()); + if (codepoints == 0) + value_utf32.clear(); /// input was empty or no valid UTF-8 + + const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny); + if (!ok) + value_puny.clear(); + + res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + value_utf32.clear(); + value_puny.clear(); /// utf32_to_punycode() appends to its output string + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); + } +}; + + +template +struct PunycodeDecode +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const size_t rows = offsets.size(); + res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII + res_offsets.reserve(rows); + + size_t prev_offset = 0; + std::u32string value_utf32; + std::string value_utf8; + for (size_t row = 0; row < rows; ++row) + { + const char * value = reinterpret_cast(&data[prev_offset]); + const size_t value_length = offsets[row] - prev_offset - 1; + + const std::string_view value_punycode(value, value_length); + const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32); + if (!ok) + { + if constexpr (error_handling == ErrorHandling::Throw) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' is not a valid Punycode-encoded string", value_punycode); + } + else + { + static_assert(error_handling == ErrorHandling::Empty); + value_utf32.clear(); + } + } + + const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size()); + value_utf8.resize(utf8_length); + ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data()); + + res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1); + res_offsets.push_back(res_data.size()); + + prev_offset = offsets[row]; + + value_utf32.clear(); /// punycode_to_utf32() appends to its output string + value_utf8.clear(); + } + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); + } +}; + +struct NamePunycodeEncode { static constexpr auto name = "punycodeEncode"; }; +struct NamePunycodeDecode { static constexpr auto name = "punycodeDecode"; }; +struct NameTryPunycodeDecode { static constexpr auto name = "tryPunycodeDecode"; }; + +using FunctionPunycodeEncode = FunctionStringToString; +using FunctionPunycodeDecode = FunctionStringToString, NamePunycodeDecode>; +using FunctionTryPunycodeDecode = FunctionStringToString, NameTryPunycodeDecode>; + +REGISTER_FUNCTION(Punycode) +{ + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes a Punycode representation of a string.)", + .syntax="punycodeEncode(str)", + .arguments={{"str", "Input string"}}, + .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT punycodeEncode('München') AS puny;", + R"( +┌─puny───────┐ +│ Mnchen-3ya │ +└────────────┘ + )" + }} + }); + + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes a Punycode representation of a string. Throws an exception if the input is not valid Punycode.)", + .syntax="punycodeDecode(str)", + .arguments={{"str", "A Punycode-encoded string"}}, + .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT punycodeDecode('Mnchen-3ya') AS plain;", + R"( +┌─plain───┐ +│ München │ +└─────────┘ + )" + }} + }); + + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Computes a Punycode representation of a string. Returns an empty string if the input is not valid Punycode.)", + .syntax="punycodeDecode(str)", + .arguments={{"str", "A Punycode-encoded string"}}, + .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).", + .examples={ + {"simple", + "SELECT tryPunycodeDecode('Mnchen-3ya') AS plain;", + R"( +┌─plain───┐ +│ München │ +└─────────┘ + )" + }} + }); +} + +} + +#endif diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp new file mode 100644 index 00000000000..21e36761213 --- /dev/null +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -0,0 +1,238 @@ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wshadow" +#pragma clang diagnostic ignored "-Wimplicit-float-conversion" +#endif + +#include + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int ILLEGAL_COLUMN; +} + +// Decompose time series data based on STL(Seasonal-Trend Decomposition Procedure Based on Loess) +class FunctionSeriesDecomposeSTL : public IFunction +{ +public: + static constexpr auto name = "seriesDecomposeSTL"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"time_series", &isArray, nullptr, "Array"}, + {"period", &isNativeUInt, nullptr, "Unsigned Integer"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); + + return std::make_shared(std::make_shared(std::make_shared())); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + { + ColumnPtr array_ptr = arguments[0].column; + const ColumnArray * array = checkAndGetColumn(array_ptr.get()); + if (!array) + { + const ColumnConst * const_array = checkAndGetColumnConst(arguments[0].column.get()); + if (!const_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); + + array_ptr = const_array->convertToFullColumn(); + array = assert_cast(array_ptr.get()); + } + + const IColumn & src_data = array->getData(); + const ColumnArray::Offsets & src_offsets = array->getOffsets(); + + auto res = ColumnFloat32::create(); + auto & res_data = res->getData(); + + ColumnArray::ColumnOffsets::MutablePtr res_col_offsets = ColumnArray::ColumnOffsets::create(); + auto & res_col_offsets_data = res_col_offsets->getData(); + + auto root_offsets = ColumnArray::ColumnOffsets::create(); + auto & root_offsets_data = root_offsets->getData(); + + ColumnArray::Offset prev_src_offset = 0; + + for (size_t i = 0; i < src_offsets.size(); ++i) + { + UInt64 period; + auto period_ptr = arguments[1].column->convertToFullColumnIfConst(); + if (checkAndGetColumn(period_ptr.get()) + || checkAndGetColumn(period_ptr.get()) + || checkAndGetColumn(period_ptr.get()) + || checkAndGetColumn(period_ptr.get())) + period = period_ptr->getUInt(i); + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second argument of function {}", + arguments[1].column->getName(), + getName()); + + + std::vector seasonal; + std::vector trend; + std::vector residue; + + ColumnArray::Offset curr_offset = src_offsets[i]; + + if (executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue) + || executeNumber(src_data, period, prev_src_offset, curr_offset, seasonal, trend, residue)) + { + res_data.insert(seasonal.begin(), seasonal.end()); + res_col_offsets_data.push_back(res_data.size()); + + res_data.insert(trend.begin(), trend.end()); + res_col_offsets_data.push_back(res_data.size()); + + res_data.insert(residue.begin(), residue.end()); + res_col_offsets_data.push_back(res_data.size()); + + root_offsets_data.push_back(res_col_offsets->size()); + + prev_src_offset = curr_offset; + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); + } + ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(res), std::move(res_col_offsets)); + return ColumnArray::create(std::move(nested_array_col), std::move(root_offsets)); + } + + template + bool executeNumber( + const IColumn & src_data, + UInt64 period, + ColumnArray::Offset start, + ColumnArray::Offset end, + std::vector & seasonal, + std::vector & trend, + std::vector & residue) const + { + const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); + if (!src_data_concrete) + return false; + + const PaddedPODArray & src_vec = src_data_concrete->getData(); + + chassert(start <= end); + size_t len = end - start; + if (len < 4) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName()); + if (period > (len / 2)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The series should have data of at least two period lengths for function {}", getName()); + + std::vector src(src_vec.begin() + start, src_vec.begin() + end); + + auto res = stl::params().fit(src, period); + + if (res.seasonal.empty()) + return false; + + seasonal = std::move(res.seasonal); + trend = std::move(res.trend); + residue = std::move(res.remainder); + return true; + } +}; +REGISTER_FUNCTION(seriesDecomposeSTL) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. + +**Syntax** + +``` sql +seriesDecomposeSTL(series, period); +``` + +**Arguments** + +- `series` - An array of numeric values +- `period` - A positive number + +The number of data points in `series` should be at least twice the value of `period`. + +**Returned value** + +- An array of three arrays where the first array include seasonal components, the second array - trend, and the third array - residue component. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34], 3) AS print_0; +``` + +Result: + +``` text +┌───────────print_0──────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [[ + -13.529999, -3.1799996, 16.71, -13.53, -3.1799996, 16.71, -13.53, -3.1799996, + 16.71, -13.530001, -3.18, 16.710001, -13.530001, -3.1800003, 16.710001, -13.530001, + -3.1800003, 16.710001, -13.530001, -3.1799994, 16.71, -13.529999, -3.1799994, 16.709997 + ], + [ + 23.63, 23.63, 23.630003, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, + 23.630001, 23.630001, 23.630001, 23.63, 23.630001, 23.630001, 23.63, 23.630001, + 23.630001, 23.63, 23.630001, 23.630001, 23.630001, 23.630001, 23.630001, 23.630003 + ], + [ + 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, + 0 + ]] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +```)", + .categories{"Time series analysis"}}); +} +} diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 363a3f8ac13..cd3875e2607 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -2,13 +2,16 @@ #if USE_SQIDS +#include #include #include +#include +#include #include #include #include -#include #include +#include #include #include @@ -22,17 +25,17 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -// sqid(number1, ...) -class FunctionSqid : public IFunction +/// sqidEncode(number1, ...) +class FunctionSqidEncode : public IFunction { public: - static constexpr auto name = "sqid"; + static constexpr auto name = "sqidEncode"; String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -80,21 +83,93 @@ private: sqidscxx::Sqids<> sqids; }; +/// sqidDecode(number1, ...) +class FunctionSqidDecode : public IFunction +{ +public: + static constexpr auto name = "sqidDecode"; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"sqid", &isString, nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res_nested = ColumnUInt64::create(); + auto & res_nested_data = col_res_nested->getData(); + + auto col_res_offsets = ColumnArray::ColumnOffsets::create(); + auto & res_offsets_data = col_res_offsets->getData(); + res_offsets_data.reserve(input_rows_count); + + const auto & src = arguments[0]; + const auto & src_column = *src.column; + + if (const auto * col_non_const = typeid_cast(&src_column)) + { + for (size_t i = 0; i < input_rows_count; ++i) + { + std::string_view sqid = col_non_const->getDataAt(i).toView(); + std::vector integers = sqids.decode(sqid); + res_nested_data.insert(integers.begin(), integers.end()); + res_offsets_data.push_back(integers.size()); + } + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return ColumnArray::create(std::move(col_res_nested), std::move(col_res_offsets)); + } + +private: + sqidscxx::Sqids<> sqids; +}; + REGISTER_FUNCTION(Sqid) { - factory.registerFunction(FunctionDocumentation{ + factory.registerFunction(FunctionDocumentation{ .description=R"( Transforms numbers into a [Sqid](https://sqids.org/) which is a Youtube-like ID string.)", - .syntax="sqid(number1, ...)", + .syntax="sqidEncode(number1, ...)", .arguments={{"number1, ...", "Arbitrarily many UInt8, UInt16, UInt32 or UInt64 arguments"}}, .returned_value="A hash id [String](/docs/en/sql-reference/data-types/string.md).", .examples={ {"simple", - "SELECT sqid(1, 2, 3, 4, 5);", + "SELECT sqidEncode(1, 2, 3, 4, 5);", R"( -┌─sqid(1, 2, 3, 4, 5)─┐ -│ gXHfJ1C6dN │ -└─────────────────────┘ +┌─sqidEncode(1, 2, 3, 4, 5)─┐ +│ gXHfJ1C6dN │ +└───────────────────────────┘ + )" + }} + }); + factory.registerAlias("sqid", FunctionSqidEncode::name); + + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Transforms a [Sqid](https://sqids.org/) back into an array of numbers.)", + .syntax="sqidDecode(number1, ...)", + .arguments={{"sqid", "A sqid"}}, + .returned_value="An array of [UInt64](/docs/en/sql-reference/data-types/int-uint.md).", + .examples={ + {"simple", + "SELECT sqidDecode('gXHfJ1C6dN');", + R"( +┌─sqidDecode('gXHfJ1C6dN')─┐ +│ [1,2,3,4,5] │ +└──────────────────────────┘ )" }} }); diff --git a/src/Functions/stl.hpp b/src/Functions/stl.hpp new file mode 100644 index 00000000000..fbfc52f959c --- /dev/null +++ b/src/Functions/stl.hpp @@ -0,0 +1,513 @@ +// Dump of https://github.com/ankane/stl-cpp/blob/3b1b3a3e9335cda26c8b0797d8b8d24ac8e350ad/include/stl.hpp. +// Added to ClickHouse source code and not referenced as a submodule because its easier maintain and modify/customize. + +/*! + * STL C++ v0.1.3 + * https://github.com/ankane/stl-cpp + * Unlicense OR MIT License + * + * Ported from https://www.netlib.org/a/stl + * + * Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990). + * STL: A Seasonal-Trend Decomposition Procedure Based on Loess. + * Journal of Official Statistics, 6(1), 3-33. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace stl { + +bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, size_t nleft, size_t nright, float* w, bool userw, const float* rw) { + auto range = ((float) n) - 1.0; + auto h = std::max(xs - ((float) nleft), ((float) nright) - xs); + + if (len > n) { + h += (float) ((len - n) / 2); + } + + auto h9 = 0.999 * h; + auto h1 = 0.001 * h; + + // compute weights + auto a = 0.0; + for (auto j = nleft; j <= nright; j++) { + w[j - 1] = 0.0; + auto r = fabs(((float) j) - xs); + if (r <= h9) { + if (r <= h1) { + w[j - 1] = 1.0; + } else { + w[j - 1] = pow(1.0 - pow(r / h, 3), 3); + } + if (userw) { + w[j - 1] *= rw[j - 1]; + } + a += w[j - 1]; + } + } + + if (a <= 0.0) { + return false; + } else { // weighted least squares + for (auto j = nleft; j <= nright; j++) { // make sum of w(j) == 1 + w[j - 1] /= a; + } + + if (h > 0.0 && ideg > 0) { // use linear fit + auto a = 0.0; + for (auto j = nleft; j <= nright; j++) { // weighted center of x values + a += w[j - 1] * ((float) j); + } + auto b = xs - a; + auto c = 0.0; + for (auto j = nleft; j <= nright; j++) { + c += w[j - 1] * pow(((float) j) - a, 2); + } + if (sqrt(c) > 0.001 * range) { + b /= c; + + // points are spread out enough to compute slope + for (auto j = nleft; j <= nright; j++) { + w[j - 1] *= b * (((float) j) - a) + 1.0; + } + } + } + + *ys = 0.0; + for (auto j = nleft; j <= nright; j++) { + *ys += w[j - 1] * y[j - 1]; + } + + return true; + } +} + +void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool userw, const float* rw, float* ys, float* res) { + if (n < 2) { + ys[0] = y[0]; + return; + } + + size_t nleft = 0; + size_t nright = 0; + + auto newnj = std::min(njump, n - 1); + if (len >= n) { + nleft = 1; + nright = n; + for (size_t i = 1; i <= n; i += newnj) { + auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw); + if (!ok) { + ys[i - 1] = y[i - 1]; + } + } + } else if (newnj == 1) { // newnj equal to one, len less than n + auto nsh = (len + 1) / 2; + nleft = 1; + nright = len; + for (size_t i = 1; i <= n; i++) { // fitted value at i + if (i > nsh && nright != n) { + nleft += 1; + nright += 1; + } + auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw); + if (!ok) { + ys[i - 1] = y[i - 1]; + } + } + } else { // newnj greater than one, len less than n + auto nsh = (len + 1) / 2; + for (size_t i = 1; i <= n; i += newnj) { // fitted value at i + if (i < nsh) { + nleft = 1; + nright = len; + } else if (i >= n - nsh + 1) { + nleft = n - len + 1; + nright = n; + } else { + nleft = i - nsh + 1; + nright = len + i - nsh; + } + auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw); + if (!ok) { + ys[i - 1] = y[i - 1]; + } + } + } + + if (newnj != 1) { + for (size_t i = 1; i <= n - newnj; i += newnj) { + auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj); + for (auto j = i + 1; j <= i + newnj - 1; j++) { + ys[j - 1] = ys[i - 1] + delta * ((float) (j - i)); + } + } + auto k = ((n - 1) / newnj) * newnj + 1; + if (k != n) { + auto ok = est(y, n, len, ideg, (float) n, &ys[n - 1], nleft, nright, res, userw, rw); + if (!ok) { + ys[n - 1] = y[n - 1]; + } + if (k != n - 1) { + auto delta = (ys[n - 1] - ys[k - 1]) / ((float) (n - k)); + for (auto j = k + 1; j <= n - 1; j++) { + ys[j - 1] = ys[k - 1] + delta * ((float) (j - k)); + } + } + } + } +} + +void ma(const float* x, size_t n, size_t len, float* ave) { + auto newn = n - len + 1; + auto flen = (float) len; + auto v = 0.0; + + // get the first average + for (size_t i = 0; i < len; i++) { + v += x[i]; + } + + ave[0] = v / flen; + if (newn > 1) { + auto k = len; + auto m = 0; + for (size_t j = 1; j < newn; j++) { + // window down the array + v = v - x[m] + x[k]; + ave[j] = v / flen; + k += 1; + m += 1; + } + } +} + +void fts(const float* x, size_t n, size_t np, float* trend, float* work) { + ma(x, n, np, trend); + ma(trend, n - np + 1, np, work); + ma(work, n - 2 * np + 2, 3, trend); +} + +void rwts(const float* y, size_t n, const float* fit, float* rw) { + for (size_t i = 0; i < n; i++) { + rw[i] = fabs(y[i] - fit[i]); + } + + auto mid1 = (n - 1) / 2; + auto mid2 = n / 2; + + // sort + std::sort(rw, rw + n); + + auto cmad = 3.0 * (rw[mid1] + rw[mid2]); // 6 * median abs resid + auto c9 = 0.999 * cmad; + auto c1 = 0.001 * cmad; + + for (size_t i = 0; i < n; i++) { + auto r = fabs(y[i] - fit[i]); + if (r <= c1) { + rw[i] = 1.0; + } else if (r <= c9) { + rw[i] = pow(1.0 - pow(r / cmad, 2), 2); + } else { + rw[i] = 0.0; + } + } +} + +void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) { + for (size_t j = 1; j <= np; j++) { + size_t k = (n - j) / np + 1; + + for (size_t i = 1; i <= k; i++) { + work1[i - 1] = y[(i - 1) * np + j - 1]; + } + if (userw) { + for (size_t i = 1; i <= k; i++) { + work3[i - 1] = rw[(i - 1) * np + j - 1]; + } + } + ess(work1, k, ns, isdeg, nsjump, userw, work3, work2 + 1, work4); + auto xs = 0.0; + auto nright = std::min(ns, k); + auto ok = est(work1, k, ns, isdeg, xs, &work2[0], 1, nright, work4, userw, work3); + if (!ok) { + work2[0] = work2[1]; + } + xs = k + 1; + size_t nleft = std::max(1, (int) k - (int) ns + 1); + ok = est(work1, k, ns, isdeg, xs, &work2[k + 1], nleft, k, work4, userw, work3); + if (!ok) { + work2[k + 1] = work2[k]; + } + for (size_t m = 1; m <= k + 2; m++) { + season[(m - 1) * np + j - 1] = work2[m - 1]; + } + } +} + +void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) { + for (size_t j = 0; j < ni; j++) { + for (size_t i = 0; i < n; i++) { + work1[i] = y[i] - trend[i]; + } + + ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season); + fts(work2, n + 2 * np, np, work3, work1); + ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5); + for (size_t i = 0; i < n; i++) { + season[i] = work2[np + i] - work1[i]; + } + for (size_t i = 0; i < n; i++) { + work1[i] = y[i] - season[i]; + } + ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3); + } +} + +void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) { + if (ns < 3) { + throw std::invalid_argument("seasonal_length must be at least 3"); + } + if (nt < 3) { + throw std::invalid_argument("trend_length must be at least 3"); + } + if (nl < 3) { + throw std::invalid_argument("low_pass_length must be at least 3"); + } + if (np < 2) { + throw std::invalid_argument("period must be at least 2"); + } + + if (isdeg != 0 && isdeg != 1) { + throw std::invalid_argument("seasonal_degree must be 0 or 1"); + } + if (itdeg != 0 && itdeg != 1) { + throw std::invalid_argument("trend_degree must be 0 or 1"); + } + if (ildeg != 0 && ildeg != 1) { + throw std::invalid_argument("low_pass_degree must be 0 or 1"); + } + + if (ns % 2 != 1) { + throw std::invalid_argument("seasonal_length must be odd"); + } + if (nt % 2 != 1) { + throw std::invalid_argument("trend_length must be odd"); + } + if (nl % 2 != 1) { + throw std::invalid_argument("low_pass_length must be odd"); + } + + auto work1 = std::vector(n + 2 * np); + auto work2 = std::vector(n + 2 * np); + auto work3 = std::vector(n + 2 * np); + auto work4 = std::vector(n + 2 * np); + auto work5 = std::vector(n + 2 * np); + + auto userw = false; + size_t k = 0; + + while (true) { + onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data()); + k += 1; + if (k > no) { + break; + } + for (size_t i = 0; i < n; i++) { + work1[i] = trend[i] + season[i]; + } + rwts(y, n, work1.data(), rw); + userw = true; + } + + if (no <= 0) { + for (size_t i = 0; i < n; i++) { + rw[i] = 1.0; + } + } +} + +float var(const std::vector& series) { + auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size(); + std::vector tmp; + tmp.reserve(series.size()); + for (auto v : series) { + tmp.push_back(pow(v - mean, 2)); + } + return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1); +} + +float strength(const std::vector& component, const std::vector& remainder) { + std::vector sr; + sr.reserve(remainder.size()); + for (size_t i = 0; i < remainder.size(); i++) { + sr.push_back(component[i] + remainder[i]); + } + return std::max(0.0, 1.0 - var(remainder) / var(sr)); +} + +class StlResult { +public: + std::vector seasonal; + std::vector trend; + std::vector remainder; + std::vector weights; + + inline float seasonal_strength() { + return strength(seasonal, remainder); + } + + inline float trend_strength() { + return strength(trend, remainder); + } +}; + +class StlParams { + std::optional ns_ = std::nullopt; + std::optional nt_ = std::nullopt; + std::optional nl_ = std::nullopt; + int isdeg_ = 0; + int itdeg_ = 1; + std::optional ildeg_ = std::nullopt; + std::optional nsjump_ = std::nullopt; + std::optional ntjump_ = std::nullopt; + std::optional nljump_ = std::nullopt; + std::optional ni_ = std::nullopt; + std::optional no_ = std::nullopt; + bool robust_ = false; + +public: + inline StlParams seasonal_length(size_t ns) { + this->ns_ = ns; + return *this; + } + + inline StlParams trend_length(size_t nt) { + this->nt_ = nt; + return *this; + } + + inline StlParams low_pass_length(size_t nl) { + this->nl_ = nl; + return *this; + } + + inline StlParams seasonal_degree(int isdeg) { + this->isdeg_ = isdeg; + return *this; + } + + inline StlParams trend_degree(int itdeg) { + this->itdeg_ = itdeg; + return *this; + } + + inline StlParams low_pass_degree(int ildeg) { + this->ildeg_ = ildeg; + return *this; + } + + inline StlParams seasonal_jump(size_t nsjump) { + this->nsjump_ = nsjump; + return *this; + } + + inline StlParams trend_jump(size_t ntjump) { + this->ntjump_ = ntjump; + return *this; + } + + inline StlParams low_pass_jump(size_t nljump) { + this->nljump_ = nljump; + return *this; + } + + inline StlParams inner_loops(bool ni) { + this->ni_ = ni; + return *this; + } + + inline StlParams outer_loops(bool no) { + this->no_ = no; + return *this; + } + + inline StlParams robust(bool robust) { + this->robust_ = robust; + return *this; + } + + StlResult fit(const float* y, size_t n, size_t np); + StlResult fit(const std::vector& y, size_t np); +}; + +StlParams params() { + return StlParams(); +} + +StlResult StlParams::fit(const float* y, size_t n, size_t np) { + if (n < 2 * np) { + throw std::invalid_argument("series has less than two periods"); + } + + auto ns = this->ns_.value_or(np); + + auto isdeg = this->isdeg_; + auto itdeg = this->itdeg_; + + auto res = StlResult { + std::vector(n), + std::vector(n), + std::vector(), + std::vector(n) + }; + + auto ildeg = this->ildeg_.value_or(itdeg); + auto newns = std::max(ns, (size_t) 3); + if (newns % 2 == 0) { + newns += 1; + } + + auto newnp = std::max(np, (size_t) 2); + auto nt = (size_t) ceil((1.5 * newnp) / (1.0 - 1.5 / (float) newns)); + nt = this->nt_.value_or(nt); + nt = std::max(nt, (size_t) 3); + if (nt % 2 == 0) { + nt += 1; + } + + auto nl = this->nl_.value_or(newnp); + if (nl % 2 == 0 && !this->nl_.has_value()) { + nl += 1; + } + + auto ni = this->ni_.value_or(this->robust_ ? 1 : 2); + auto no = this->no_.value_or(this->robust_ ? 15 : 0); + + auto nsjump = this->nsjump_.value_or((size_t) ceil(((float) newns) / 10.0)); + auto ntjump = this->ntjump_.value_or((size_t) ceil(((float) nt) / 10.0)); + auto nljump = this->nljump_.value_or((size_t) ceil(((float) nl) / 10.0)); + + stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data()); + + res.remainder.reserve(n); + for (size_t i = 0; i < n; i++) { + res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]); + } + + return res; +} + +StlResult StlParams::fit(const std::vector& y, size_t np) { + return StlParams::fit(y.data(), y.size(), np); +} + +} diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp index a19c6770dad..a497b78a6c2 100644 --- a/src/IO/BrotliWriteBuffer.cpp +++ b/src/IO/BrotliWriteBuffer.cpp @@ -13,33 +13,14 @@ namespace ErrorCodes } -class BrotliWriteBuffer::BrotliStateWrapper +BrotliWriteBuffer::BrotliStateWrapper::BrotliStateWrapper() +: state(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr)) { -public: - BrotliStateWrapper() - : state(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr)) - { - } +} - ~BrotliStateWrapper() - { - BrotliEncoderDestroyInstance(state); - } - - BrotliEncoderState * state; -}; - -BrotliWriteBuffer::BrotliWriteBuffer(std::unique_ptr out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) - , brotli(std::make_unique()) - , in_available(0) - , in_data(nullptr) - , out_capacity(0) - , out_data(nullptr) +BrotliWriteBuffer::BrotliStateWrapper::~BrotliStateWrapper() { - BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast(compression_level)); - // Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81) - BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24); + BrotliEncoderDestroyInstance(state); } BrotliWriteBuffer::~BrotliWriteBuffer() = default; @@ -58,18 +39,20 @@ void BrotliWriteBuffer::nextImpl() { do { + const auto * in_data_ptr = in_data; out->nextIfAtEnd(); out_data = reinterpret_cast(out->position()); out_capacity = out->buffer().end() - out->position(); int result = BrotliEncoderCompressStream( brotli->state, - in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, + BROTLI_OPERATION_PROCESS, &in_available, &in_data, &out_capacity, &out_data, nullptr); + total_in += in_data - in_data_ptr; out->position() = out->buffer().end() - out_capacity; @@ -92,6 +75,10 @@ void BrotliWriteBuffer::finalizeBefore() { next(); + /// Don't write out if no data was ever compressed + if (!compress_empty && total_in == 0) + return; + while (true) { out->nextIfAtEnd(); diff --git a/src/IO/BrotliWriteBuffer.h b/src/IO/BrotliWriteBuffer.h index 8cbc78bd9e7..d4cda7b270c 100644 --- a/src/IO/BrotliWriteBuffer.h +++ b/src/IO/BrotliWriteBuffer.h @@ -4,18 +4,38 @@ #include #include +#include "config.h" + +#if USE_BROTLI +# include + namespace DB { + class BrotliWriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template BrotliWriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, int compression_level, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) + , brotli(std::make_unique()) + , in_available(0) + , in_data(nullptr) + , out_capacity(0) + , out_data(nullptr) + , compress_empty(compress_empty_) + { + BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast(compression_level)); + // Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81) + BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24); + } ~BrotliWriteBuffer() override; @@ -24,7 +44,15 @@ private: void finalizeBefore() override; - class BrotliStateWrapper; + class BrotliStateWrapper + { + public: + BrotliStateWrapper(); + ~BrotliStateWrapper(); + + BrotliEncoderState * state; + }; + std::unique_ptr brotli; @@ -33,6 +61,12 @@ private: size_t out_capacity; uint8_t * out_data; + +protected: + UInt64 total_in = 0; + bool compress_empty = true; }; } + +#endif diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index 7a59687fa56..4c0a467b155 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index b84cbdd1e41..3421b4c3985 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -15,34 +15,22 @@ namespace ErrorCodes } -class Bzip2WriteBuffer::Bzip2StateWrapper +Bzip2WriteBuffer::Bzip2StateWrapper::Bzip2StateWrapper(int compression_level) { -public: - explicit Bzip2StateWrapper(int compression_level) - { - memset(&stream, 0, sizeof(stream)); + memset(&stream, 0, sizeof(stream)); - int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0); + int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0); - if (ret != BZ_OK) - throw Exception( - ErrorCodes::BZIP2_STREAM_ENCODER_FAILED, - "bzip2 stream encoder init failed: error code: {}", - ret); - } + if (ret != BZ_OK) + throw Exception( + ErrorCodes::BZIP2_STREAM_ENCODER_FAILED, + "bzip2 stream encoder init failed: error code: {}", + ret); +} - ~Bzip2StateWrapper() - { - BZ2_bzCompressEnd(&stream); - } - - bz_stream stream; -}; - -Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) - , bz(std::make_unique(compression_level)) +Bzip2WriteBuffer::Bzip2StateWrapper::~Bzip2StateWrapper() { + BZ2_bzCompressEnd(&stream); } Bzip2WriteBuffer::~Bzip2WriteBuffer() = default; @@ -77,6 +65,8 @@ void Bzip2WriteBuffer::nextImpl() } while (bz->stream.avail_in > 0); + + total_in += offset(); } catch (...) { @@ -90,6 +80,10 @@ void Bzip2WriteBuffer::finalizeBefore() { next(); + /// Don't write out if no data was ever compressed + if (!compress_empty && total_in == 0) + return; + out->nextIfAtEnd(); bz->stream.next_out = out->position(); bz->stream.avail_out = static_cast(out->buffer().end() - out->position()); diff --git a/src/IO/Bzip2WriteBuffer.h b/src/IO/Bzip2WriteBuffer.h index d0371903487..63c67461c6a 100644 --- a/src/IO/Bzip2WriteBuffer.h +++ b/src/IO/Bzip2WriteBuffer.h @@ -4,18 +4,29 @@ #include #include +#include "config.h" + +#if USE_BZIP2 +# include + namespace DB { class Bzip2WriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template Bzip2WriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, int compression_level, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment), bz(std::make_unique(compression_level)) + , compress_empty(compress_empty_) + { + } ~Bzip2WriteBuffer() override; @@ -24,8 +35,20 @@ private: void finalizeBefore() override; - class Bzip2StateWrapper; + class Bzip2StateWrapper + { + public: + explicit Bzip2StateWrapper(int compression_level); + ~Bzip2StateWrapper(); + + bz_stream stream; + }; + std::unique_ptr bz; + bool compress_empty = true; + UInt64 total_in = 0; }; } + +#endif diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 13e1adbb702..b8e1134d422 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -169,37 +169,68 @@ std::unique_ptr wrapReadBufferWithCompressionMethod( return createCompressedWrapper(std::move(nested), method, buf_size, existing_memory, alignment, zstd_window_log_max); } -std::unique_ptr wrapWriteBufferWithCompressionMethod( - std::unique_ptr nested, CompressionMethod method, int level, size_t buf_size, char * existing_memory, size_t alignment) + +template +std::unique_ptr createWriteCompressedWrapper( + WriteBufferT && nested, CompressionMethod method, int level, int zstd_window_log, size_t buf_size, char * existing_memory, size_t alignment, bool compress_empty) { if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib) - return std::make_unique(std::move(nested), method, level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), method, level, buf_size, existing_memory, alignment, compress_empty); #if USE_BROTLI if (method == DB::CompressionMethod::Brotli) - return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), level, buf_size, existing_memory, alignment, compress_empty); #endif if (method == CompressionMethod::Xz) - return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), level, buf_size, existing_memory, alignment, compress_empty); if (method == CompressionMethod::Zstd) - return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), level, zstd_window_log, buf_size, existing_memory, alignment, compress_empty); if (method == CompressionMethod::Lz4) - return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), level, buf_size, existing_memory, alignment, compress_empty); #if USE_BZIP2 if (method == CompressionMethod::Bzip2) - return std::make_unique(std::move(nested), level, buf_size, existing_memory, alignment); + return std::make_unique(std::forward(nested), level, buf_size, existing_memory, alignment, compress_empty); #endif #if USE_SNAPPY if (method == CompressionMethod::Snappy) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); #endif - if (method == CompressionMethod::None) - return nested; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); } + +std::unique_ptr wrapWriteBufferWithCompressionMethod( + std::unique_ptr nested, + CompressionMethod method, + int level, + int zstd_window_log, + size_t buf_size, + char * existing_memory, + size_t alignment, + bool compress_empty) +{ + if (method == CompressionMethod::None) + return nested; + return createWriteCompressedWrapper(nested, method, level, zstd_window_log, buf_size, existing_memory, alignment, compress_empty); +} + + +std::unique_ptr wrapWriteBufferWithCompressionMethod( + WriteBuffer * nested, + CompressionMethod method, + int level, + int zstd_window_log, + size_t buf_size, + char * existing_memory, + size_t alignment, + bool compress_empty) +{ + assert(method != CompressionMethod::None); + return createWriteCompressedWrapper(nested, method, level, zstd_window_log, buf_size, existing_memory, alignment, compress_empty); +} + } diff --git a/src/IO/CompressionMethod.h b/src/IO/CompressionMethod.h index c142531cd05..c10c4901d41 100644 --- a/src/IO/CompressionMethod.h +++ b/src/IO/CompressionMethod.h @@ -61,13 +61,24 @@ std::unique_ptr wrapReadBufferWithCompressionMethod( char * existing_memory = nullptr, size_t alignment = 0); - std::unique_ptr wrapWriteBufferWithCompressionMethod( std::unique_ptr nested, CompressionMethod method, int level, + int zstd_window_log = 0, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty = true); + +std::unique_ptr wrapWriteBufferWithCompressionMethod( + WriteBuffer * nested, + CompressionMethod method, + int level, + int zstd_window_log, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0, + bool compress_empty = true); } diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp index a77b2bb7b39..db8f8c95fe6 100644 --- a/src/IO/LZMADeflatingWriteBuffer.cpp +++ b/src/IO/LZMADeflatingWriteBuffer.cpp @@ -7,9 +7,7 @@ namespace ErrorCodes extern const int LZMA_STREAM_ENCODER_FAILED; } -LZMADeflatingWriteBuffer::LZMADeflatingWriteBuffer( - std::unique_ptr out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) +void LZMADeflatingWriteBuffer::initialize(int compression_level) { lstr = LZMA_STREAM_INIT; @@ -94,6 +92,10 @@ void LZMADeflatingWriteBuffer::finalizeBefore() { next(); + /// Don't write out if no data was ever compressed + if (!compress_empty && lstr.total_out == 0) + return; + do { out->nextIfAtEnd(); diff --git a/src/IO/LZMADeflatingWriteBuffer.h b/src/IO/LZMADeflatingWriteBuffer.h index 2e135455e00..797b85cd400 100644 --- a/src/IO/LZMADeflatingWriteBuffer.h +++ b/src/IO/LZMADeflatingWriteBuffer.h @@ -14,22 +14,32 @@ namespace DB class LZMADeflatingWriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template LZMADeflatingWriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, int compression_level, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment), compress_empty(compress_empty_) + { + initialize(compression_level); + } ~LZMADeflatingWriteBuffer() override; private: + void initialize(int compression_level); + void nextImpl() override; void finalizeBefore() override; void finalizeAfter() override; lzma_stream lstr; + + bool compress_empty = true; }; } diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index 8241bfd4f3c..a8cac823b50 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -63,11 +63,8 @@ namespace ErrorCodes extern const int LZ4_ENCODER_FAILED; } -Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer( - std::unique_ptr out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) - , tmp_memory(buf_size) +void Lz4DeflatingWriteBuffer::initialize(int compression_level) { kPrefs = { {LZ4F_max256KB, @@ -105,7 +102,7 @@ void Lz4DeflatingWriteBuffer::nextImpl() if (first_time) { - auto sink = SinkToOut(out.get(), tmp_memory, LZ4F_HEADER_SIZE_MAX); + auto sink = SinkToOut(out, tmp_memory, LZ4F_HEADER_SIZE_MAX); chassert(sink.getCapacity() >= LZ4F_HEADER_SIZE_MAX); /// write frame header and check for errors @@ -131,7 +128,7 @@ void Lz4DeflatingWriteBuffer::nextImpl() /// Ensure that there is enough space for compressed block of minimal size size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs); - auto sink = SinkToOut(out.get(), tmp_memory, min_compressed_block_size); + auto sink = SinkToOut(out, tmp_memory, min_compressed_block_size); chassert(sink.getCapacity() >= min_compressed_block_size); /// LZ4F_compressUpdate compresses whole input buffer at once so we need to shink it manually @@ -163,8 +160,12 @@ void Lz4DeflatingWriteBuffer::finalizeBefore() { next(); + /// Don't write out if no data was ever compressed + if (!compress_empty && first_time) + return; + auto suffix_size = LZ4F_compressBound(0, &kPrefs); - auto sink = SinkToOut(out.get(), tmp_memory, suffix_size); + auto sink = SinkToOut(out, tmp_memory, suffix_size); chassert(sink.getCapacity() >= suffix_size); /// compression end diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h index 7bb8a5e6c0e..b37d61fa732 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.h +++ b/src/IO/Lz4DeflatingWriteBuffer.h @@ -14,16 +14,26 @@ namespace DB class Lz4DeflatingWriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template Lz4DeflatingWriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, int compression_level, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) + , tmp_memory(buf_size) + , compress_empty(compress_empty_) + { + initialize(compression_level); + } ~Lz4DeflatingWriteBuffer() override; private: + void initialize(int compression_level); + void nextImpl() override; void finalizeBefore() override; @@ -35,5 +45,6 @@ private: Memory<> tmp_memory; bool first_time = true; + bool compress_empty = true; }; } diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index ff72dc5386c..d399721d060 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -99,6 +99,9 @@ bool ReadBufferFromPocoSocket::nextImpl() if (bytes_read < 0) throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", peer_address.toString()); + if (read_event != ProfileEvents::end()) + ProfileEvents::increment(read_event, bytes_read); + if (bytes_read) working_buffer.resize(bytes_read); else @@ -111,10 +114,17 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, : BufferWithOwnMemory(buf_size) , socket(socket_) , peer_address(socket.peerAddress()) + , read_event(ProfileEvents::end()) , socket_description("socket (" + peer_address.toString() + ")") { } +ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) + : ReadBufferFromPocoSocket(socket_, buf_size) +{ + read_event = read_event_; +} + bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const { if (available()) diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index dab4ac86295..76156612764 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -20,10 +20,13 @@ protected: */ Poco::Net::SocketAddress peer_address; + ProfileEvents::Event read_event; + bool nextImpl() override; public: explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); bool poll(size_t timeout_microseconds) const; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index a65a82d9b40..b65de8d34a7 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -3,7 +3,6 @@ #if USE_AWS_S3 #include -#include #include #include #include @@ -15,7 +14,6 @@ #include -#include #include #include #include @@ -37,6 +35,9 @@ namespace ProfileEvents extern const Event DiskS3WriteRequestsErrors; extern const Event DiskS3ReadRequestsErrors; + + extern const Event S3Clients; + extern const Event TinyS3Clients; } namespace DB @@ -199,6 +200,8 @@ Client::Client( cache = std::make_shared(); ClientCacheRegistry::instance().registerClient(cache); + + ProfileEvents::increment(ProfileEvents::S3Clients); } Client::Client( @@ -219,6 +222,22 @@ Client::Client( { cache = std::make_shared(*other.cache); ClientCacheRegistry::instance().registerClient(cache); + + ProfileEvents::increment(ProfileEvents::TinyS3Clients); +} + + +Client::~Client() +{ + try + { + ClientCacheRegistry::instance().unregisterClient(cache.get()); + } + catch (...) + { + tryLogCurrentException(log); + throw; + } } Aws::Auth::AWSCredentials Client::getCredentials() const diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index b137f0605dc..677b739fd39 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -142,18 +142,7 @@ public: Client(Client && other) = delete; Client & operator=(Client &&) = delete; - ~Client() override - { - try - { - ClientCacheRegistry::instance().unregisterClient(cache.get()); - } - catch (...) - { - tryLogCurrentException(log); - throw; - } - } + ~Client() override; /// Returns the initial endpoint. const String & getInitialEndpoint() const { return initial_endpoint; } @@ -170,7 +159,7 @@ public: class RetryStrategy : public Aws::Client::RetryStrategy { public: - RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000); + explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000); /// NOLINTNEXTLINE(google-runtime-int) bool ShouldRetry(const Aws::Client::AWSError& error, long attemptedRetries) const override; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a9dfd03a5e3..f2acda80adf 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -27,14 +28,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif #include static const int SUCCESS_RESPONSE_MIN = 200; diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index e990875dd2f..c5d92c6c0f2 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -6,18 +6,10 @@ #if USE_AWS_S3 #include #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 96ad6413ef5..5039059f522 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -6,21 +6,12 @@ #if USE_AWS_S3 -# include - -# include # include -# include - -# include -# include # include -# include # include -# include +# include # include -# include namespace ProfileEvents { @@ -147,6 +138,12 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const }; } +bool AuthSettings::hasUpdates(const AuthSettings & other) const +{ + AuthSettings copy = *this; + copy.updateFrom(other); + return *this != copy; +} void AuthSettings::updateFrom(const AuthSettings & from) { @@ -175,7 +172,7 @@ void AuthSettings::updateFrom(const AuthSettings & from) expiration_window_seconds = from.expiration_window_seconds; if (from.no_sign_request.has_value()) - no_sign_request = *from.no_sign_request; + no_sign_request = from.no_sign_request; } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index ebfc07a3976..6ee8d96ed09 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -92,9 +92,11 @@ struct AuthSettings std::optional expiration_window_seconds; std::optional no_sign_request; - bool operator==(const AuthSettings & other) const = default; - + bool hasUpdates(const AuthSettings & other) const; void updateFrom(const AuthSettings & from); + +private: + bool operator==(const AuthSettings & other) const = default; }; } diff --git a/src/IO/WriteBufferDecorator.h b/src/IO/WriteBufferDecorator.h index 7c984eeea8d..ee47834b7af 100644 --- a/src/IO/WriteBufferDecorator.h +++ b/src/IO/WriteBufferDecorator.h @@ -12,13 +12,21 @@ class WriteBuffer; /// WriteBuffer that decorates data and delegates it to underlying buffer. /// It's used for writing compressed and encrypted data +/// This class can own or not own underlying buffer - constructor will differentiate +/// std::unique_ptr for owning and WriteBuffer* for not owning. template class WriteBufferDecorator : public Base { public: template explicit WriteBufferDecorator(std::unique_ptr out_, BaseArgs && ... args) - : Base(std::forward(args)...), out(std::move(out_)) + : Base(std::forward(args)...), owning_holder(std::move(out_)), out(owning_holder.get()) + { + } + + template + explicit WriteBufferDecorator(WriteBuffer * out_, BaseArgs && ... args) + : Base(std::forward(args)...), out(out_) { } @@ -38,7 +46,7 @@ public: } } - WriteBuffer * getNestedBuffer() { return out.get(); } + WriteBuffer * getNestedBuffer() { return out; } protected: /// Do some finalization before finalization of underlying buffer. @@ -47,7 +55,8 @@ protected: /// Do some finalization after finalization of underlying buffer. virtual void finalizeAfter() {} - std::unique_ptr out; + std::unique_ptr owning_holder; + WriteBuffer * out; }; using WriteBufferWithOwnMemoryDecorator = WriteBufferDecorator>; diff --git a/src/IO/WriteBufferFromEncryptedFile.cpp b/src/IO/WriteBufferFromEncryptedFile.cpp index 5bca0dc68d5..693f422c549 100644 --- a/src/IO/WriteBufferFromEncryptedFile.cpp +++ b/src/IO/WriteBufferFromEncryptedFile.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_SSL @@ -21,7 +22,9 @@ WriteBufferFromEncryptedFile::WriteBufferFromEncryptedFile( WriteBufferFromEncryptedFile::~WriteBufferFromEncryptedFile() { - finalize(); + /// That destructor could be call with finalized=false in case of exceptions. + if (!finalized) + LOG_INFO(log, "WriteBufferFromEncryptedFile is not finalized in destructor"); } void WriteBufferFromEncryptedFile::finalizeBefore() diff --git a/src/IO/WriteBufferFromEncryptedFile.h b/src/IO/WriteBufferFromEncryptedFile.h index 25dd54ca9d5..c6edcf76533 100644 --- a/src/IO/WriteBufferFromEncryptedFile.h +++ b/src/IO/WriteBufferFromEncryptedFile.h @@ -28,7 +28,7 @@ public: void sync() override; - std::string getFileName() const override { return assert_cast(out.get())->getFileName(); } + std::string getFileName() const override { return assert_cast(out)->getFileName(); } private: void nextImpl() override; @@ -39,6 +39,8 @@ private: bool flush_header = false; FileEncryption::Encryptor encryptor; + + Poco::Logger * log = &Poco::Logger::get("WriteBufferFromEncryptedFile"); }; } diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 171e7f1ce69..10d9fd131cd 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -34,6 +34,97 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +ssize_t WriteBufferFromPocoSocket::socketSendBytesImpl(const char * ptr, size_t size) +{ + ssize_t res = 0; + + /// If async_callback is specified, set socket to non-blocking mode + /// and try to write data to it, if socket is not ready for writing, + /// run async_callback and try again later. + /// It is expected that file descriptor may be polled externally. + /// Note that send timeout is not checked here. External code should check it while polling. + if (async_callback) + { + socket.setBlocking(false); + /// Set socket to blocking mode at the end. + SCOPE_EXIT(socket.setBlocking(true)); + bool secure = socket.secure(); + res = socket.impl()->sendBytes(ptr, static_cast(size)); + + /// Check EAGAIN and ERR_SSL_WANT_WRITE/ERR_SSL_WANT_READ for secure socket (writing to secure socket can read too). + while (res < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(res) || checkSSLWantWrite(res))))) + { + /// In case of ERR_SSL_WANT_READ we should wait for socket to be ready for reading, otherwise - for writing. + if (secure && checkSSLWantRead(res)) + async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR); + else + async_callback(socket.impl()->sockfd(), socket.getSendTimeout(), AsyncEventTimeoutType::SEND, socket_description, AsyncTaskExecutor::Event::WRITE | AsyncTaskExecutor::Event::ERROR); + + /// Try to write again. + res = socket.impl()->sendBytes(ptr, static_cast(size)); + } + } + else + { + res = socket.impl()->sendBytes(ptr, static_cast(size)); + } + + return res; +} + +void WriteBufferFromPocoSocket::socketSendBytes(const char * ptr, size_t size) +{ + if (!size) + return; + + Stopwatch watch; + size_t bytes_written = 0; + + SCOPE_EXIT({ + ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::NetworkSendBytes, bytes_written); + if (write_event != ProfileEvents::end()) + ProfileEvents::increment(write_event, bytes_written); + }); + + while (bytes_written < size) + { + ssize_t res = 0; + + /// Add more details to exceptions. + try + { + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend); + if (size > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + + res = socketSendBytesImpl(ptr + bytes_written, size - bytes_written); + } + catch (const Poco::Net::NetException & e) + { + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({} -> {})", e.displayText(), + our_address.toString(), peer_address.toString()); + } + catch (const Poco::TimeoutException &) + { + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while writing to socket ({}, {} ms)", + peer_address.toString(), + socket.impl()->getSendTimeout().totalMilliseconds()); + } + catch (const Poco::IOException & e) + { + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({} -> {})", e.displayText(), + our_address.toString(), peer_address.toString()); + } + + if (res < 0) + throw NetException(ErrorCodes::CANNOT_WRITE_TO_SOCKET, "Cannot write to socket ({} -> {})", + our_address.toString(), peer_address.toString()); + + bytes_written += res; + } +} + void WriteBufferFromPocoSocket::nextImpl() { if (!offset()) @@ -60,36 +151,7 @@ void WriteBufferFromPocoSocket::nextImpl() if (size > INT_MAX) throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); - /// If async_callback is specified, set socket to non-blocking mode - /// and try to write data to it, if socket is not ready for writing, - /// run async_callback and try again later. - /// It is expected that file descriptor may be polled externally. - /// Note that send timeout is not checked here. External code should check it while polling. - if (async_callback) - { - socket.setBlocking(false); - /// Set socket to blocking mode at the end. - SCOPE_EXIT(socket.setBlocking(true)); - bool secure = socket.secure(); - res = socket.impl()->sendBytes(pos, static_cast(size)); - - /// Check EAGAIN and ERR_SSL_WANT_WRITE/ERR_SSL_WANT_READ for secure socket (writing to secure socket can read too). - while (res < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(res) || checkSSLWantWrite(res))))) - { - /// In case of ERR_SSL_WANT_READ we should wait for socket to be ready for reading, otherwise - for writing. - if (secure && checkSSLWantRead(res)) - async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR); - else - async_callback(socket.impl()->sockfd(), socket.getSendTimeout(), AsyncEventTimeoutType::SEND, socket_description, AsyncTaskExecutor::Event::WRITE | AsyncTaskExecutor::Event::ERROR); - - /// Try to write again. - res = socket.impl()->sendBytes(pos, static_cast(size)); - } - } - else - { - res = socket.impl()->sendBytes(pos, static_cast(size)); - } + res = socketSendBytesImpl(pos, size); } catch (const Poco::Net::NetException & e) { @@ -125,6 +187,12 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_ { } +WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size) + : WriteBufferFromPocoSocket(socket_, buf_size) +{ + write_event = write_event_; +} + WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() { try diff --git a/src/IO/WriteBufferFromPocoSocket.h b/src/IO/WriteBufferFromPocoSocket.h index ecb61020357..9c5509aebd1 100644 --- a/src/IO/WriteBufferFromPocoSocket.h +++ b/src/IO/WriteBufferFromPocoSocket.h @@ -17,14 +17,33 @@ class WriteBufferFromPocoSocket : public BufferWithOwnMemory { public: explicit WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); ~WriteBufferFromPocoSocket() override; void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); } + using WriteBuffer::write; + void write(const std::string & str) { WriteBuffer::write(str.c_str(), str.size()); } + void write(std::string_view str) { WriteBuffer::write(str.data(), str.size()); } + void write(const char * str) { WriteBuffer::write(str, strlen(str)); } + void writeln(const std::string & str) { write(str); WriteBuffer::write("\n", 1); } + void writeln(std::string_view str) { write(str); WriteBuffer::write("\n", 1); } + void writeln(const char * str) { write(str); WriteBuffer::write("\n", 1); } + protected: void nextImpl() override; + void socketSendBytes(const char * ptr, size_t size); + void socketSendStr(const std::string & str) + { + return socketSendBytes(str.data(), str.size()); + } + void socketSendStr(const char * ptr) + { + return socketSendBytes(ptr, strlen(ptr)); + } + Poco::Net::Socket & socket; /** For error messages. It is necessary to receive this address in advance, because, @@ -34,9 +53,13 @@ protected: Poco::Net::SocketAddress peer_address; Poco::Net::SocketAddress our_address; + ProfileEvents::Event write_event; + private: AsyncCallback async_callback; std::string socket_description; + + ssize_t socketSendBytesImpl(const char * ptr, size_t size); }; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 094352638e6..b4f8b476b11 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -63,9 +63,7 @@ namespace ErrorCodes inline void writeChar(char x, WriteBuffer & buf) { - buf.nextIfAtEnd(); - *buf.position() = x; - ++buf.position(); + buf.write(x); } /// Write the same character n times. diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp index 6e4ab742413..ab6763fe6a6 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -10,36 +10,6 @@ namespace ErrorCodes extern const int ZLIB_DEFLATE_FAILED; } - -ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer( - std::unique_ptr out_, - CompressionMethod compression_method, - int compression_level, - size_t buf_size, - char * existing_memory, - size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) -{ - zstr.zalloc = nullptr; - zstr.zfree = nullptr; - zstr.opaque = nullptr; - zstr.next_in = nullptr; - zstr.avail_in = 0; - zstr.next_out = nullptr; - zstr.avail_out = 0; - - int window_bits = 15; - if (compression_method == CompressionMethod::Gzip) - { - window_bits += 16; - } - - int rc = deflateInit2(&zstr, compression_level, Z_DEFLATED, window_bits, 8, Z_DEFAULT_STRATEGY); - - if (rc != Z_OK) - throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflateInit2 failed: {}; zlib version: {}", zError(rc), ZLIB_VERSION); -} - void ZlibDeflatingWriteBuffer::nextImpl() { if (!offset()) @@ -82,6 +52,10 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() { next(); + /// Don't write out if no data was ever compressed + if (!compress_empty && zstr.total_out == 0) + return; + /// https://github.com/zlib-ng/zlib-ng/issues/494 do { diff --git a/src/IO/ZlibDeflatingWriteBuffer.h b/src/IO/ZlibDeflatingWriteBuffer.h index 58e709b54e6..f01c41c7d13 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.h +++ b/src/IO/ZlibDeflatingWriteBuffer.h @@ -12,17 +12,45 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ZLIB_DEFLATE_FAILED; +} + /// Performs compression using zlib library and writes compressed data to out_ WriteBuffer. class ZlibDeflatingWriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template ZlibDeflatingWriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, CompressionMethod compression_method, int compression_level, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment), compress_empty(compress_empty_) + { + zstr.zalloc = nullptr; + zstr.zfree = nullptr; + zstr.opaque = nullptr; + zstr.next_in = nullptr; + zstr.avail_in = 0; + zstr.next_out = nullptr; + zstr.avail_out = 0; + + int window_bits = 15; + if (compression_method == CompressionMethod::Gzip) + { + window_bits += 16; + } + + int rc = deflateInit2(&zstr, compression_level, Z_DEFLATED, window_bits, 8, Z_DEFAULT_STRATEGY); + + if (rc != Z_OK) + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflateInit2 failed: {}; zlib version: {}", zError(rc), ZLIB_VERSION); + } ~ZlibDeflatingWriteBuffer() override; @@ -36,6 +64,7 @@ private: virtual void finalizeAfter() override; z_stream zstr; + bool compress_empty = true; }; } diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index 949d65926b3..b014f7e59a4 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -1,30 +1,49 @@ #include #include +#include namespace DB { namespace ErrorCodes { extern const int ZSTD_ENCODER_FAILED; + extern const int ILLEGAL_CODEC_PARAMETER; } -ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer( - std::unique_ptr out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment) +static void setZstdParameter(ZSTD_CCtx * cctx, ZSTD_cParameter param, int value) +{ + auto ret = ZSTD_CCtx_setParameter(cctx, param, value); + if (ZSTD_isError(ret)) + throw Exception( + ErrorCodes::ZSTD_ENCODER_FAILED, + "zstd stream encoder option setting failed: error code: {}; zstd version: {}", + ret, + ZSTD_VERSION_STRING); +} + +void ZstdDeflatingWriteBuffer::initialize(int compression_level, int window_log) { cctx = ZSTD_createCCtx(); if (cctx == nullptr) throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING); - size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level); - if (ZSTD_isError(ret)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, - "zstd stream encoder option setting failed: error code: {}; zstd version: {}", - ret, ZSTD_VERSION_STRING); - ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); - if (ZSTD_isError(ret)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, - "zstd stream encoder option setting failed: error code: {}; zstd version: {}", - ret, ZSTD_VERSION_STRING); + setZstdParameter(cctx, ZSTD_c_compressionLevel, compression_level); + + if (window_log > 0) + { + ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog); + if (ZSTD_isError(window_log_bounds.error)) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "ZSTD windowLog parameter is not supported {}", + std::string(ZSTD_getErrorName(window_log_bounds.error))); + if (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, + "ZSTD codec can't have window log more than {} and lower than {}, given {}", + toString(window_log_bounds.upperBound), + toString(window_log_bounds.lowerBound), toString(window_log)); + setZstdParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1); + setZstdParameter(cctx, ZSTD_c_windowLog, window_log); + } + + setZstdParameter(cctx, ZSTD_c_checksumFlag, 1); input = {nullptr, 0, 0}; output = {nullptr, 0, 0}; @@ -44,6 +63,7 @@ void ZstdDeflatingWriteBuffer::flush(ZSTD_EndDirective mode) try { + size_t out_offset = out->offset(); bool ended = false; do { @@ -67,6 +87,8 @@ void ZstdDeflatingWriteBuffer::flush(ZSTD_EndDirective mode) ended = everything_was_compressed && everything_was_flushed; } while (!ended); + + total_out += out->offset() - out_offset; } catch (...) { @@ -84,6 +106,9 @@ void ZstdDeflatingWriteBuffer::nextImpl() void ZstdDeflatingWriteBuffer::finalizeBefore() { + /// Don't write out if no data was ever compressed + if (!compress_empty && total_out == 0) + return; flush(ZSTD_e_end); } diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h index a66d6085a74..15c3869062f 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.h +++ b/src/IO/ZstdDeflatingWriteBuffer.h @@ -14,12 +14,19 @@ namespace DB class ZstdDeflatingWriteBuffer : public WriteBufferWithOwnMemoryDecorator { public: + template ZstdDeflatingWriteBuffer( - std::unique_ptr out_, + WriteBufferT && out_, int compression_level, + int window_log = 0, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + bool compress_empty_ = true) + : WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment), compress_empty(compress_empty_) + { + initialize(compression_level, window_log); + } ~ZstdDeflatingWriteBuffer() override; @@ -29,6 +36,8 @@ public: } private: + void initialize(int compression_level, int window_log); + void nextImpl() override; /// Flush all pending data and write zstd footer to the underlying buffer. @@ -42,6 +51,9 @@ private: ZSTD_CCtx * cctx; ZSTD_inBuffer input; ZSTD_outBuffer output; + + size_t total_out = 0; + bool compress_empty = true; }; } diff --git a/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp index b62f3a8b0bd..56608644425 100644 --- a/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -160,4 +161,13 @@ void InterpreterCreateQuotaQuery::updateQuotaFromQuery(Quota & quota, const ASTC updateQuotaFromQueryImpl(quota, query, {}, {}); } +void registerInterpreterCreateQuotaQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateQuotaQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp index fef1f285c8b..4936bd15262 100644 --- a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -122,4 +123,14 @@ void InterpreterCreateRoleQuery::updateRoleFromQuery(Role & role, const ASTCreat { updateRoleFromQueryImpl(role, query, {}, {}); } + +void registerInterpreterCreateRoleQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateRoleQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp index e4593222f6d..b48c3880c59 100644 --- a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -148,4 +149,13 @@ AccessRightsElements InterpreterCreateRowPolicyQuery::getRequiredAccess() const return res; } +void registerInterpreterCreateRowPolicyQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateRowPolicyQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index 3a96c0a96ff..029deff9b22 100644 --- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -138,4 +139,14 @@ void InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(Setti { updateSettingsProfileFromQueryImpl(SettingsProfile, query, {}, {}, {}); } + +void registerInterpreterCreateSettingsProfileQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateSettingsProfileQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 00e21f27d2e..32c51b745c7 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -261,4 +262,13 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); } +void registerInterpreterCreateUserQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateUserQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp index 371ed248306..612000c3480 100644 --- a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -95,4 +96,13 @@ AccessRightsElements InterpreterDropAccessEntityQuery::getRequiredAccess() const throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type)); } +void registerInterpreterDropAccessEntityQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDropAccessEntityQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 259c6b39524..0f2d65abb5e 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -480,4 +481,13 @@ void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery updateFromQuery(role, query); } +void registerInterpreterGrantQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterGrantQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp index 49e90783a59..a05fc86f866 100644 --- a/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterMoveAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -90,4 +91,13 @@ AccessRightsElements InterpreterMoveAccessEntityQuery::getRequiredAccess() const throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type)); } +void registerInterpreterMoveAccessEntityQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterMoveAccessEntityQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 69740f90d4c..24467923542 100644 --- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -90,4 +91,13 @@ void InterpreterSetRoleQuery::updateUserSetDefaultRoles(User & user, const Roles user.default_roles = roles_from_query; } +void registerInterpreterSetRoleQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterSetRoleQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index bffb47ac714..76979ed86c8 100644 --- a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -125,4 +126,13 @@ String InterpreterShowAccessEntitiesQuery::getRewrittenQuery() const (order.empty() ? "" : " ORDER BY " + order); } +void registerInterpreterShowAccessEntitiesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowAccessEntitiesQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp index e9862e99393..23ce66fe663 100644 --- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -80,4 +81,13 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const return result; } +void registerInterpreterShowAccessQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowAccessQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index ec2e60b2ef7..a55588baeaa 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -420,4 +421,14 @@ AccessRightsElements InterpreterShowCreateAccessEntityQuery::getRequiredAccess() } throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by SHOW CREATE query", toString(show_query.type)); } + +void registerInterpreterShowCreateAccessEntityQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowCreateAccessEntityQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp index 56fbb34a577..ba96dafbf0d 100644 --- a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -198,4 +199,13 @@ ASTs InterpreterShowGrantsQuery::getAttachGrantQueries(const IAccessEntity & use return getGrantQueriesImpl(user_or_role, nullptr, true); } +void registerInterpreterShowGrantsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowGrantsQuery", create_fn); +} + } diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp index 1a0b441a06d..42b7fc51022 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -15,4 +16,14 @@ BlockIO InterpreterShowPrivilegesQuery::execute() return executeQuery("SELECT * FROM system.privileges", context, QueryFlags{ .internal = true }).second; } +void registerInterpreterShowPrivilegesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowPrivilegesQuery", create_fn); +} + + } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index cdc4292a79c..0d80adb6313 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2856,7 +2856,7 @@ void NO_INLINE Aggregator::mergeBucketImpl( } } -ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const +ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants && data_variants) const { if (data_variants.empty()) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Empty data passed to Aggregator::prepareVariantsToMerge."); diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6fc3ac2f6d6..f4aa78043ca 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1230,7 +1230,7 @@ public: */ BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; - ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const; + ManyAggregatedDataVariants prepareVariantsToMerge(ManyAggregatedDataVariants && data_variants) const; using BucketToBlocks = std::map; /// Merge partially aggregated blocks separated to buckets into one data structure. diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 9f7d0176b45..9034f582869 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -15,7 +15,7 @@ namespace DB { -NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes() +ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() { auto type_status = std::make_shared( DataTypeEnum8::Values @@ -32,7 +32,7 @@ NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes() {"Preprocessed", static_cast(DataKind::Preprocessed)}, }); - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index 3a93b29dabe..d05375002ad 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -39,10 +40,9 @@ struct AsynchronousInsertLogElement String flush_query_id; static std::string name() { return "AsynchronousInsertLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class AsynchronousInsertLog : public SystemLog diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index a750388d38f..f905f72e7a7 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -6,21 +6,49 @@ #include #include #include +#include +#include #include namespace DB { -NamesAndTypesList AsynchronousMetricLogElement::getNamesAndTypes() +ColumnsDescription AsynchronousMetricLogElement::getColumnsDescription() { - return + ParserCodec codec_parser; + return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"metric", std::make_shared(std::make_shared())}, - {"value", std::make_shared(),} + { + "hostname", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + "Hostname of the server executing the query." + }, + { + "event_date", + std::make_shared(), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + "Event date." + }, + { + "event_time", + std::make_shared(), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + "Event time." + }, + { + "metric", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + "Metric name." + }, + { + "value", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + "Metric value." + } }; } diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 9a5266cee6e..0be8dbc2ec8 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -25,7 +26,7 @@ struct AsynchronousMetricLogElement double value; static std::string name() { return "AsynchronousMetricLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index e49bb28bd45..d34e982ffc3 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -18,9 +18,9 @@ BackupLogElement::BackupLogElement(BackupOperationInfo info_) { } -NamesAndTypesList BackupLogElement::getNamesAndTypes() +ColumnsDescription BackupLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/BackupLog.h b/src/Interpreters/BackupLog.h index 283b74f68ba..626bd55726d 100644 --- a/src/Interpreters/BackupLog.h +++ b/src/Interpreters/BackupLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -25,10 +26,9 @@ struct BackupLogElement BackupOperationInfo info{}; static std::string name() { return "BackupLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class BackupLog : public SystemLog diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index 3eb3ac40b85..520405374ca 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -12,7 +12,7 @@ namespace DB { -NamesAndTypesList BlobStorageLogElement::getNamesAndTypes() +ColumnsDescription BlobStorageLogElement::getColumnsDescription() { auto event_enum_type = std::make_shared( DataTypeEnum8::Values{ @@ -24,7 +24,8 @@ NamesAndTypesList BlobStorageLogElement::getNamesAndTypes() {"MultiPartUploadAbort", static_cast(EventType::MultiPartUploadAbort)}, }); - return { + return ColumnsDescription + { {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, {"event_time_microseconds", std::make_shared(6)}, @@ -47,46 +48,22 @@ NamesAndTypesList BlobStorageLogElement::getNamesAndTypes() void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const { -#ifndef NDEBUG - auto coulumn_names = BlobStorageLogElement::getNamesAndTypes().getNames(); -#endif - size_t i = 0; auto event_time_seconds = timeInSeconds(event_time); - assert(coulumn_names.at(i) == "event_date"); columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType()); - assert(coulumn_names.at(i) == "event_time"); columns[i++]->insert(event_time_seconds); - assert(coulumn_names.at(i) == "event_time_microseconds"); columns[i++]->insert(Decimal64(timeInMicroseconds(event_time))); - - assert(coulumn_names.at(i) == "event_type"); columns[i++]->insert(static_cast(event_type)); - - assert(coulumn_names.at(i) == "query_id"); columns[i++]->insert(query_id); - assert(coulumn_names.at(i) == "thread_id"); columns[i++]->insert(thread_id); - assert(coulumn_names.at(i) == "thread_name"); columns[i++]->insert(thread_name); - - assert(coulumn_names.at(i) == "disk_name"); columns[i++]->insert(disk_name); - assert(coulumn_names.at(i) == "bucket"); columns[i++]->insert(bucket); - assert(coulumn_names.at(i) == "remote_path"); columns[i++]->insert(remote_path); - assert(coulumn_names.at(i) == "local_path"); columns[i++]->insert(local_path); - assert(coulumn_names.at(i) == "data_size"); columns[i++]->insert(data_size); - - assert(coulumn_names.at(i) == "error"); columns[i++]->insert(error_message); - - assert(i == coulumn_names.size() && columns.size() == coulumn_names.size()); } } - diff --git a/src/Interpreters/BlobStorageLog.h b/src/Interpreters/BlobStorageLog.h index 460d54a386d..aa57ee44c0f 100644 --- a/src/Interpreters/BlobStorageLog.h +++ b/src/Interpreters/BlobStorageLog.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -42,10 +43,9 @@ struct BlobStorageLogElement static std::string name() { return "BlobStorageLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index eafd7aafb29..8fb185038fc 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -1,8 +1,9 @@ #pragma once +#include +#include #include #include -#include namespace Poco { namespace Util { class AbstractConfiguration; } } // NOLINT(cppcoreguidelines-virtual-class-destructor) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 8347d32bd3c..151f2ea06cc 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -129,12 +129,14 @@ String queryStringFromAST(ASTPtr ast) QueryCache::Key::Key( ASTPtr ast_, Block header_, - const String & user_name_, bool is_shared_, + std::optional user_id_, const std::vector & current_user_roles_, + bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed_) : ast(removeQueryCacheSettings(ast_)) , header(header_) - , user_name(user_name_) + , user_id(user_id_) + , current_user_roles(current_user_roles_) , is_shared(is_shared_) , expires_at(expires_at_) , is_compressed(is_compressed_) @@ -142,8 +144,8 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, const String & user_name_) - : QueryCache::Key(ast_, {}, user_name_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name +QueryCache::Key::Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_) + : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name { } @@ -401,7 +403,9 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar const auto & entry_key = entry->key; const auto & entry_mapped = entry->mapped; - if (!entry_key.is_shared && entry_key.user_name != key.user_name) + const bool is_same_user_id = ((!entry_key.user_id.has_value() && !key.user_id.has_value()) || (entry_key.user_id.has_value() && key.user_id.has_value() && *entry_key.user_id == *key.user_id)); + const bool is_same_current_user_roles = (entry_key.current_user_roles == key.current_user_roles); + if (!entry_key.is_shared && (!is_same_user_id || !is_same_current_user_roles)) { LOG_TRACE(logger, "Inaccessible query result found for query {}", doubleQuoteString(key.query_string)); return; @@ -503,7 +507,9 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis /// Update the per-user cache quotas with the values stored in the query context. This happens per query which writes into the query /// cache. Obviously, this is overkill but I could find the good place to hook into which is called when the settings profiles in /// users.xml change. - cache.setQuotaForUser(key.user_name, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota); + /// user_id == std::nullopt is the internal user for which no quota can be configured + if (key.user_id.has_value()) + cache.setQuotaForUser(*key.user_id, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota); std::lock_guard lock(mutex); return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size); diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index d3c98dbd97a..2dd4887dd20 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -4,9 +4,12 @@ #include #include #include -#include #include +#include #include +#include + +#include namespace DB { @@ -51,8 +54,15 @@ public: /// Result metadata for constructing the pipe. const Block header; - /// The user who executed the query. - const String user_name; + /// The id and current roles of the user who executed the query. + /// These members are necessary to ensure that a (non-shared, see below) entry can only be written and read by the same user with + /// the same roles. Example attack scenarios: + /// - after DROP USER, it must not be possible to create a new user with with the dropped user name and access the dropped user's + /// query cache entries + /// - different roles of the same user may be tied to different row-level policies. It must not be possible to switch role and + /// access another role's cache entries + std::optional user_id; + std::vector current_user_roles; /// If the associated entry can be read by other users. In general, sharing is a bad idea: First, it is unlikely that different /// users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be able to bypass row @@ -74,12 +84,13 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, Block header_, - const String & user_name_, bool is_shared_, + std::optional user_id_, const std::vector & current_user_roles_, + bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, const String & user_name_); + Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 4edc9d4d4e5..f8a070a6fde 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -117,13 +117,13 @@ void SelectStreamFactory::createForShard( std::vector & local_plans, Shards & remote_shards, UInt32 shard_count, - bool parallel_replicas_enabled) + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator) { auto it = objects_by_shard.find(shard_info.shard_num); if (it != objects_by_shard.end()) replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast); - auto emplace_local_stream = [&]() { local_plans.emplace_back(createLocalPlan( @@ -139,6 +139,7 @@ void SelectStreamFactory::createForShard( .shard_info = shard_info, .lazy = lazy, .local_delay = local_delay, + .shard_filter_generator = std::move(shard_filter_generator), }); }; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 511b0dfaadb..9993ea7028d 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -40,6 +40,7 @@ ASTPtr rewriteSelectQuery( ASTPtr table_function_ptr = nullptr); using ColumnsDescriptionByShardNum = std::unordered_map; +using AdditionalShardFilterGenerator = std::function; class SelectStreamFactory { @@ -59,6 +60,7 @@ public: /// (When there is a local replica with big delay). bool lazy = false; time_t local_delay = 0; + AdditionalShardFilterGenerator shard_filter_generator{}; }; using Shards = std::vector; @@ -78,7 +80,8 @@ public: std::vector & local_plans, Shards & remote_shards, UInt32 shard_count, - bool parallel_replicas_enabled); + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator); const Block header; const ColumnsDescriptionByShardNum objects_by_shard; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index c448206ed78..b3a48e3e611 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -158,6 +158,13 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf; } + /// in case of parallel replicas custom key use round robing load balancing + /// so custom key partitions will be spread over nodes in round-robin fashion + if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + { + new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; + } + auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); return new_context; @@ -247,21 +254,6 @@ void executeQuery( visitor.visit(query_ast_for_shard); } - if (shard_filter_generator) - { - auto shard_filter = shard_filter_generator(shard_info.shard_num); - if (shard_filter) - { - auto & select_query = query_ast_for_shard->as(); - - auto where_expression = select_query.where(); - if (where_expression) - shard_filter = makeASTFunction("and", where_expression, shard_filter); - - select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter)); - } - } - // decide for each shard if parallel reading from replicas should be enabled // according to settings and number of replicas declared per shard const auto & addresses = cluster->getShardsAddresses().at(i); @@ -276,7 +268,8 @@ void executeQuery( plans, remote_shards, static_cast(shards), - parallel_replicas_enabled); + parallel_replicas_enabled, + shard_filter_generator); } if (!remote_shards.empty()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 1fc49057e07..b5ee4a11df6 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -65,7 +65,7 @@ void executeQuery( const std::string & sharding_key_column_name, const ClusterPtr & not_optimized_cluster, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator = {}); + AdditionalShardFilterGenerator shard_filter_generator); void executeQueryWithParallelReplicas( diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 8e73bc8b484..96be70c5527 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -46,6 +46,9 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr(); inner_hash_join->data = std::make_unique(table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", i)); + /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. + /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. + inner_hash_join->data->setMaxJoinedBlockRows(0); hash_joins.emplace_back(std::move(inner_hash_join)); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 38944b21c49..f1c74f4195b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1583,9 +1583,7 @@ bool Context::hasScalar(const String & name) const void Context::addQueryAccessInfo( const String & quoted_database_name, const String & full_quoted_table_name, - const Names & column_names, - const String & projection_name, - const String & view_name) + const Names & column_names) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); @@ -1593,12 +1591,9 @@ void Context::addQueryAccessInfo( std::lock_guard lock(query_access_info.mutex); query_access_info.databases.emplace(quoted_database_name); query_access_info.tables.emplace(full_quoted_table_name); + for (const auto & column_name : column_names) query_access_info.columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name)); - if (!projection_name.empty()) - query_access_info.projections.emplace(full_quoted_table_name + "." + backQuoteIfNeed(projection_name)); - if (!view_name.empty()) - query_access_info.views.emplace(view_name); } void Context::addQueryAccessInfo(const Names & partition_names) @@ -1611,6 +1606,15 @@ void Context::addQueryAccessInfo(const Names & partition_names) query_access_info.partitions.emplace(partition_name); } +void Context::addViewAccessInfo(const String & view_name) +{ + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + + std::lock_guard lock(query_access_info.mutex); + query_access_info.views.emplace(view_name); +} + void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) { if (!qualified_projection_name) @@ -5083,6 +5087,12 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } +bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +{ + return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY + && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) { prepared_sets_cache = cache; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 640aeb0539c..3ef076f45f0 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -693,13 +693,14 @@ public: void addSpecialScalar(const String & name, const Block & block); const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; } + void addQueryAccessInfo( const String & quoted_database_name, const String & full_quoted_table_name, - const Names & column_names, - const String & projection_name = {}, - const String & view_name = {}); + const Names & column_names); + void addQueryAccessInfo(const Names & partition_names); + void addViewAccessInfo(const String & view_name); struct QualifiedProjectionName { @@ -707,8 +708,8 @@ public: String projection_name; explicit operator bool() const { return !projection_name.empty(); } }; - void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); + void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); /// Supported factories for records in query_log enum class QueryLogFactories @@ -1241,6 +1242,7 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; + bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; enum class ParallelReplicasMode : uint8_t { diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 7b8a87b7918..4fb81e4bcf7 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -19,9 +19,9 @@ namespace DB std::weak_ptr CrashLog::crash_log; -NamesAndTypesList CrashLogElement::getNamesAndTypes() +ColumnsDescription CrashLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h index 65714295be4..ef05b434a62 100644 --- a/src/Interpreters/CrashLog.h +++ b/src/Interpreters/CrashLog.h @@ -4,6 +4,7 @@ #include #include #include +#include /// Call this function on crash. @@ -27,10 +28,9 @@ struct CrashLogElement Array trace_full; static std::string name() { return "CrashLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class CrashLog : public SystemLog diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 969c57535f9..5c628436d60 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1050,7 +1050,7 @@ static std::unique_ptr buildJoinedPlan( join_element.table_expression, context, original_right_column_names, - query_options.copy().setWithAllColumns().ignoreProjections(false).ignoreAlias(false)); + query_options.copy().setWithAllColumns().ignoreAlias(false)); auto joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); { diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index f8012925b8c..ccfee49a66f 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -1,3 +1,4 @@ +#include "Storages/ColumnsDescription.h" #include #include #include @@ -28,14 +29,15 @@ static String typeToString(FilesystemCacheLogElement::CacheType type) UNREACHABLE(); } -NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() +ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() { DataTypes types{ std::make_shared>(), std::make_shared>(), }; - return { + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 41a7c8e0fe6..c83ac4e62fd 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { @@ -39,11 +40,10 @@ struct FilesystemCacheLogElement static std::string name() { return "FilesystemCacheLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; - static const char * getCustomColumnList() { return nullptr; } }; class FilesystemCacheLog : public SystemLog diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.cpp b/src/Interpreters/FilesystemReadPrefetchesLog.cpp index 3a26f069b5f..7fb2e3d1f4c 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.cpp +++ b/src/Interpreters/FilesystemReadPrefetchesLog.cpp @@ -11,9 +11,10 @@ namespace DB { -NamesAndTypesList FilesystemReadPrefetchesLogElement::getNamesAndTypes() +ColumnsDescription FilesystemReadPrefetchesLogElement::getColumnsDescription() { - return { + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.h b/src/Interpreters/FilesystemReadPrefetchesLog.h index 313c8ab5872..ee6da4966e9 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.h +++ b/src/Interpreters/FilesystemReadPrefetchesLog.h @@ -1,10 +1,11 @@ #pragma once +#include +#include #include #include #include -#include -#include +#include namespace DB { @@ -33,7 +34,7 @@ struct FilesystemReadPrefetchesLogElement static std::string name() { return "FilesystemReadPrefetchesLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index a84e1ec2175..b05b10ff25e 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -243,6 +243,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , asof_inequality(table_join->getAsofInequality()) , data(std::make_shared()) , right_sample_block(right_sample_block_) + , max_joined_block_rows(table_join->maxJoinedBlockRows()) , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") , log(&Poco::Logger::get("HashJoin")) { @@ -1401,7 +1402,7 @@ NO_INLINE size_t joinRightColumns( { if constexpr (join_features.need_replication) { - if (unlikely(current_offset > max_joined_block_rows)) + if (unlikely(current_offset >= max_joined_block_rows)) { added_columns.offsets_to_replicate->resize_assume_reserved(i); added_columns.filter.resize_assume_reserved(i); @@ -1690,7 +1691,7 @@ Block HashJoin::joinBlockImpl( bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = join_features.need_filter || has_required_right_keys; - added_columns.max_joined_block_rows = table_join->maxJoinedBlockRows(); + added_columns.max_joined_block_rows = max_joined_block_rows; if (!added_columns.max_joined_block_rows) added_columns.max_joined_block_rows = std::numeric_limits::max(); else @@ -1771,7 +1772,6 @@ Block HashJoin::joinBlockImpl( void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const { - size_t max_joined_block_rows = table_join->maxJoinedBlockRows(); size_t start_left_row = 0; size_t start_right_block = 0; if (not_processed) diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 284cf5d0e7f..2be58b5fd2d 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -396,6 +396,8 @@ public: void shrinkStoredBlocksToFit(size_t & total_bytes_in_join); + void setMaxJoinedBlockRows(size_t value) { max_joined_block_rows = value; } + private: template friend class NotJoinedHash; @@ -433,6 +435,9 @@ private: /// Left table column names that are sources for required_right_keys columns std::vector required_right_keys_sources; + /// Maximum number of rows in result block. If it is 0, then no limits. + size_t max_joined_block_rows = 0; + /// When tracked memory consumption is more than a threshold, we will shrink to fit stored blocks. bool shrink_blocks = false; Int64 memory_usage_before_adding_blocks = 0; diff --git a/src/Interpreters/InternalTextLogsQueue.h b/src/Interpreters/InternalTextLogsQueue.h index 34fa3054707..375110e5054 100644 --- a/src/Interpreters/InternalTextLogsQueue.h +++ b/src/Interpreters/InternalTextLogsQueue.h @@ -1,17 +1,9 @@ #pragma once #include #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 478735c432b..a4e86879596 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -26,4 +27,13 @@ BlockIO InterpreterAlterNamedCollectionQuery::execute() return {}; } +void registerInterpreterAlterNamedCollectionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterAlterNamedCollectionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 2a34932d950..bfcb0d6dd39 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -535,4 +536,13 @@ void InterpreterAlterQuery::extendQueryLogElemImpl(QueryLogElement & elem, const } } +void registerInterpreterAlterQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterAlterQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterBackupQuery.cpp b/src/Interpreters/InterpreterBackupQuery.cpp index be5fcedce27..6f76b21a7b8 100644 --- a/src/Interpreters/InterpreterBackupQuery.cpp +++ b/src/Interpreters/InterpreterBackupQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -47,4 +48,13 @@ BlockIO InterpreterBackupQuery::execute() return res_io; } +void registerInterpreterBackupQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterBackupQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 6f28aee5f13..0cc4afd62f2 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -472,4 +473,13 @@ BlockIO InterpreterCheckQuery::execute() return res; } +void registerInterpreterCheckQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCheckQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ea59115b077..18e9ba4a64b 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -53,4 +54,13 @@ BlockIO InterpreterCreateFunctionQuery::execute() return {}; } +void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateFunctionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index ed29c82a0f0..cd2f996c74c 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -99,4 +100,13 @@ BlockIO InterpreterCreateIndexQuery::execute() return {}; } +void registerInterpreterCreateIndexQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateIndexQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index 3b0fba5fd9f..41e87bb73dd 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -26,4 +27,13 @@ BlockIO InterpreterCreateNamedCollectionQuery::execute() return {}; } +void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateNamedCollectionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 29abe292908..36e864ace26 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -595,6 +596,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs; bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs; bool enable_deflate_qpl_codec = attach || context_->getSettingsRef().enable_deflate_qpl_codec; + bool enable_zstd_qat_codec = attach || context_->getSettingsRef().enable_zstd_qat_codec; ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); @@ -655,7 +657,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } if (col_decl.stat_type) @@ -1895,4 +1897,13 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr } } +void registerInterpreterCreateQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterCreateQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 52723416400..5c13a1145d1 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -110,4 +111,13 @@ BlockIO InterpreterDeleteQuery::execute() } } +void registerInterpreterDeleteQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDeleteQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index 54b43a8850b..c7e863bf260 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -68,4 +69,13 @@ BlockIO InterpreterDescribeCacheQuery::execute() return res; } +void registerInterpreterDescribeCacheQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDescribeCacheQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 755de7e4c5f..1aab72afcc1 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -271,4 +272,13 @@ void InterpreterDescribeQuery::addSubcolumns(const ColumnDescription & column, b }, ISerialization::SubstreamData(type->getDefaultSerialization()).withType(type)); } +void registerInterpreterDescribeQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDescribeQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index c2cd24044da..2661fd9058c 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -49,4 +50,13 @@ BlockIO InterpreterDropFunctionQuery::execute() return {}; } +void registerInterpreterDropFunctionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDropFunctionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index 98d48942487..025677eeb91 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -68,4 +69,13 @@ BlockIO InterpreterDropIndexQuery::execute() return {}; } +void registerInterpreterDropIndexQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDropIndexQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index fe49b1cfd7c..baadc85f443 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -26,4 +27,13 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return {}; } +void registerInterpreterDropNamedCollectionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDropNamedCollectionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index b8c9d5dabb5..711100b5de1 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -519,4 +520,12 @@ bool InterpreterDropQuery::supportsTransactions() const && drop.table; } +void registerInterpreterDropQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterDropQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 90fa15bf63f..e4176a44533 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -81,4 +82,12 @@ QueryPipeline InterpreterExistsQuery::executeImpl() "result" }})); } +void registerInterpreterExistsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterExistsQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 4c70dcb3cc3..458be843b59 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -607,4 +608,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() return QueryPipeline(std::make_shared(sample_block.cloneWithColumns(std::move(res_columns)))); } +void registerInterpreterExplainQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterExplainQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp index c0acb1e03eb..e68ff18dbb6 100644 --- a/src/Interpreters/InterpreterExternalDDLQuery.cpp +++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp @@ -1,6 +1,7 @@ #include "config.h" #include +#include #include #include @@ -67,4 +68,13 @@ BlockIO InterpreterExternalDDLQuery::execute() return BlockIO(); } +void registerInterpreterExternalDDLQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterExternalDDLQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index e32cbe4ccad..84432415f5e 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -50,68 +49,14 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include - -#include #include #include #include @@ -131,10 +76,22 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_TYPE_OF_QUERY; + extern const int LOGICAL_ERROR; } +InterpreterFactory & InterpreterFactory::instance() +{ + static InterpreterFactory interpreter_fact; + return interpreter_fact; +} -std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMutablePtr context, const SelectQueryOptions & options) +void InterpreterFactory::registerInterpreter(const std::string & name, CreatorFn creator_fn) +{ + if (!interpreters.emplace(name, std::move(creator_fn)).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "InterpreterFactory: the interpreter name '{}' is not unique", name); +} + +InterpreterFactory::InterpreterPtr InterpreterFactory::get(ASTPtr & query, ContextMutablePtr context, const SelectQueryOptions & options) { ProfileEvents::increment(ProfileEvents::Query); @@ -147,258 +104,247 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut ProfileEvents::increment(ProfileEvents::QueriesWithSubqueries); } + Arguments arguments { + .query = query, + .context = context, + .options = options + }; + + String interpreter_name; + if (query->as()) { if (context->getSettingsRef().allow_experimental_analyzer) - return std::make_unique(query, context, options); - + interpreter_name = "InterpreterSelectQueryAnalyzer"; /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. - return std::make_unique(query, context, options); + else + interpreter_name = "InterpreterSelectQuery"; } else if (query->as()) { ProfileEvents::increment(ProfileEvents::SelectQuery); if (context->getSettingsRef().allow_experimental_analyzer) - return std::make_unique(query, context, options); - - return std::make_unique(query, context, options); + interpreter_name = "InterpreterSelectQueryAnalyzer"; + else + interpreter_name = "InterpreterSelectWithUnionQuery"; } else if (query->as()) { - return std::make_unique(query, context, options); + interpreter_name = "InterpreterSelectIntersectExceptQuery"; } else if (query->as()) { ProfileEvents::increment(ProfileEvents::InsertQuery); bool allow_materialized = static_cast(context->getSettingsRef().insert_allow_materialized_columns); - return std::make_unique(query, context, allow_materialized); + arguments.allow_materialized = allow_materialized; + interpreter_name = "InterpreterInsertQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDropQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterUndropQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterRenameQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowTablesQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowColumnsQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowIndexesQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowSettingQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowEnginesQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowFunctionsQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterUseQuery"; } else if (query->as()) { /// readonly is checked inside InterpreterSetQuery - return std::make_unique(query, context); + interpreter_name = "InterpreterSetQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterSetRoleQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterOptimizeQuery"; } - else if (query->as()) + else if (query->as() || query->as() || query->as() || query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterExistsQuery"; } - else if (query->as()) + else if (query->as() || query->as() || query->as() || query->as()) { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); - } - else if (query->as()) - { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowCreateQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDescribeQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDescribeCacheQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterExplainQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowProcesslistQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterAlterQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterAlterNamedCollectionQuery"; } else if (query->as() || query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCheckQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterKillQueryQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterSystemQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterWatchQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateUserQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateRoleQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateQuotaQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateRowPolicyQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateSettingsProfileQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDropAccessEntityQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterMoveAccessEntityQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDropNamedCollectionQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterGrantQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowCreateAccessEntityQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowGrantsQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowAccessEntitiesQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name= "InterpreterShowAccessQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterShowPrivilegesQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterExternalDDLQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterTransactionControlQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateFunctionQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDropFunctionQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateIndexQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterCreateNamedCollectionQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDropIndexQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterBackupQuery"; } else if (query->as()) { - return std::make_unique(query, context); + interpreter_name = "InterpreterDeleteQuery"; } - else - { + + if (!interpreters.contains(interpreter_name)) throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Unknown type of query: {}", query->getID()); - } + + // creator_fn creates and returns a InterpreterPtr with the supplied arguments + auto creator_fn = interpreters.at(interpreter_name); + + return creator_fn(arguments); } } diff --git a/src/Interpreters/InterpreterFactory.h b/src/Interpreters/InterpreterFactory.h index 774cbd1cb0f..3cf3b02d826 100644 --- a/src/Interpreters/InterpreterFactory.h +++ b/src/Interpreters/InterpreterFactory.h @@ -11,13 +11,34 @@ namespace DB class Context; -class InterpreterFactory +class InterpreterFactory : private boost::noncopyable { public: - static std::unique_ptr get( + static InterpreterFactory & instance(); + + struct Arguments + { + ASTPtr & query; + ContextMutablePtr context; + const SelectQueryOptions & options; + bool allow_materialized; + }; + + using InterpreterPtr = std::unique_ptr; + + InterpreterPtr get( ASTPtr & query, ContextMutablePtr context, const SelectQueryOptions & options = {}); + + using CreatorFn = std::function; + + using Interpreters = std::unordered_map; + + void registerInterpreter(const std::string & name, CreatorFn creator_fn); + +private: + Interpreters interpreters; }; } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 283289f0dfc..c8e05fcd5e3 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -680,4 +681,12 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons extendQueryLogElemImpl(elem, context_); } +void registerInterpreterInsertQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.allow_materialized); + }; + factory.registerInterpreter("InterpreterInsertQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 6e1422f2938..5efffdaa194 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -452,4 +453,13 @@ AccessRightsElements InterpreterKillQueryQuery::getRequiredAccessForDDLOnCluster return required_access; } +void registerInterpreterKillQueryQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterKillQueryQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index ae456e8b31d..1e2eaa50ab1 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -93,4 +94,12 @@ AccessRightsElements InterpreterOptimizeQuery::getRequiredAccess() const return required_access; } +void registerInterpreterOptimizeQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterOptimizeQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index ae79b3f932e..52001fdcaf4 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -228,4 +229,13 @@ void InterpreterRenameQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons } } +void registerInterpreterRenameQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterRenameQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 2218ed4417b..6eac2db20c9 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -210,4 +211,13 @@ void InterpreterSelectIntersectExceptQuery::extendQueryLogElemImpl(QueryLogEleme } } +void registerInterpreterSelectIntersectExceptQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectIntersectExceptQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8e8482ccbd7..f7cb6d4d849 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -389,8 +390,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!prepared_sets) prepared_sets = std::make_shared(); - query_info.ignore_projections = options.ignore_projections; - query_info.is_projection_query = options.is_projection_query; query_info.is_internal = options.is_internal; initSettings(); @@ -416,7 +415,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( } query_info.query = query_ptr->clone(); - query_info.original_query = query_ptr->clone(); if (settings.count_distinct_optimization) { @@ -591,9 +589,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( } } else if (auto * distributed = dynamic_cast(storage.get()); - distributed && canUseCustomKey(settings, *distributed->getCluster(), *context)) + distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - query_info.use_custom_key = true; context->setSetting("distributed_group_by_no_merge", 2); } } @@ -855,9 +852,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( analysis_result.required_columns = required_columns; } - if (query_info.projection) - storage_snapshot->addProjection(query_info.projection->desc); - /// Blocks used in expression analysis contains size 1 const columns for constant folding and /// null non-const columns to avoid useless memory allocations. However, a valid block sample /// requires all columns to be of size 0, thus we need to sanitize the block here. @@ -964,10 +958,7 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) executeImpl(query_plan, std::move(input_pipe)); /// We must guarantee that result structure is the same as in getSampleBlock() - /// - /// But if it's a projection query, plan header does not match result_header. - /// TODO: add special stage for InterpreterSelectQuery? - if (!options.is_projection_query && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) + if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) { auto convert_actions_dag = ActionsDAG::makeConvertingActions( query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), @@ -1475,12 +1466,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

desc->type == ProjectionDescription::Type::Aggregate) - { - query_info.projection->aggregate_overflow_row = aggregate_overflow_row; - query_info.projection->aggregate_final = aggregate_final; - } - if (options.only_analyze) { auto read_nothing = std::make_unique(source_header); @@ -1549,11 +1534,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage)); } - if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "InputOrderInfo is set for projection and for query"); InputOrderInfoPtr input_order_info_for_order; if (!expressions.need_aggregate) - input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; + input_order_info_for_order = query_info.input_order_info; if (options.to_stage > QueryProcessingStage::FetchColumns) { @@ -1614,7 +1597,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), @@ -1788,7 +1771,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(source_header)); - PrewhereInfoPtr prewhere_info_ptr = query_info.projection ? query_info.projection->prewhere_info : query_info.prewhere_info; - if (prewhere_info_ptr) + if (query_info.prewhere_info) { - auto & prewhere_info = *prewhere_info_ptr; + auto & prewhere_info = *query_info.prewhere_info; if (prewhere_info.row_level_filter) { @@ -2087,50 +2068,6 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan( auto read_from_pipe = std::make_unique(std::move(pipe)); read_from_pipe->setStepDescription("Read from NullSource"); query_plan.addStep(std::move(read_from_pipe)); - - if (query_info.projection) - { - if (query_info.projection->before_where) - { - auto where_step = std::make_unique( - query_plan.getCurrentDataStream(), - query_info.projection->before_where, - query_info.projection->where_column_name, - query_info.projection->remove_where_filter); - - where_step->setStepDescription("WHERE"); - query_plan.addStep(std::move(where_step)); - } - - if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) - { - if (query_info.projection->before_aggregation) - { - auto expression_before_aggregation - = std::make_unique(query_plan.getCurrentDataStream(), query_info.projection->before_aggregation); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); - } - - // Let's just choose the safe option since we don't know the value of `to_stage` here. - const bool should_produce_results_in_order_of_bucket_number = true; - - // It is used to determine if we should use memory bound merging strategy. Maybe it makes sense for projections, but so far this case is just left untouched. - SortDescription group_by_sort_description; - - executeMergeAggregatedImpl( - query_plan, - query_info.projection->aggregate_overflow_row, - query_info.projection->aggregate_final, - false, - false, - context_->getSettingsRef(), - query_info.projection->aggregation_keys, - query_info.projection->aggregate_descriptions, - should_produce_results_in_order_of_bucket_number, - std::move(group_by_sort_description)); - } - } } RowPolicyFilterPtr InterpreterSelectQuery::getRowPolicyFilter() const @@ -2574,80 +2511,47 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Create optimizer with prepared actions. /// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge. - if ((optimize_read_in_order || optimize_aggregation_in_order) - && (!query_info.projection || query_info.projection->complete)) + if (optimize_read_in_order) { - if (optimize_read_in_order) - { - if (query_info.projection) - { - query_info.projection->order_optimizer = std::make_shared( - // TODO Do we need a projection variant for this field? - query, - analysis_result.order_by_elements_actions, - getSortDescription(query, context), - query_info.syntax_analyzer_result); - } - else - { - query_info.order_optimizer = std::make_shared( - query, - analysis_result.order_by_elements_actions, - getSortDescription(query, context), - query_info.syntax_analyzer_result); - } - } - else if (optimize_aggregation_in_order) - { - if (query_info.projection) - { - query_info.projection->order_optimizer = std::make_shared( - query, - query_info.projection->group_by_elements_actions, - query_info.projection->group_by_elements_order_descr, - query_info.syntax_analyzer_result); - } - else - { - query_info.order_optimizer = std::make_shared( - query, - analysis_result.group_by_elements_actions, - getSortDescriptionFromGroupBy(query), - query_info.syntax_analyzer_result); - } - } + query_info.order_optimizer = std::make_shared( + query, + analysis_result.order_by_elements_actions, + getSortDescription(query, context), + query_info.syntax_analyzer_result); /// If we don't have filtration, we can pushdown limit to reading stage for optimizations. - UInt64 limit = (query.hasFiltration() || query.groupBy()) ? 0 : getLimitForSorting(query, context); - if (query_info.projection) - query_info.projection->input_order_info - = query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context, limit); - else - query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit); + UInt64 limit = query.hasFiltration() ? 0 : getLimitForSorting(query, context); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit); + } + else if (optimize_aggregation_in_order) + { + query_info.order_optimizer = std::make_shared( + query, + analysis_result.group_by_elements_actions, + getSortDescriptionFromGroupBy(query), + query_info.syntax_analyzer_result); + + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, /*limit=*/ 0); } query_info.storage_limits = std::make_shared(storage_limits); - query_info.settings_limit_offset_done = options.settings_limit_offset_done; storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) { - const String view_name{}; auto local_storage_id = storage->getStorageID(); context->getQueryContext()->addQueryAccessInfo( backQuoteIfNeed(local_storage_id.getDatabaseName()), local_storage_id.getFullTableName(), - required_columns, - query_info.projection ? query_info.projection->desc->name : "", - view_name); + required_columns); } /// Create step which reads from empty source if storage has no data. if (!query_plan.isInitialized()) { auto header = storage_snapshot->getSampleBlockForColumns(required_columns); - addEmptySourceToQueryPlan(query_plan, header, query_info, context); + addEmptySourceToQueryPlan(query_plan, header, query_info); } } else @@ -2756,13 +2660,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac expression_before_aggregation->setStepDescription("Before GROUP BY"); query_plan.addStep(std::move(expression_before_aggregation)); - if (options.is_projection_query) - return; - AggregateDescriptions aggregates = query_analyzer->aggregates(); - const Settings & settings = context->getSettingsRef(); - const auto & keys = query_analyzer->aggregationKeys().getNames(); auto aggregator_params = getAggregatorParams( @@ -2826,13 +2725,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets) { - /// If aggregate projection was chosen for table, avoid adding MergeAggregated. - /// It is already added by storage (because of performance issues). - /// TODO: We should probably add another one processing stage for storage? - /// WithMergeableStateAfterAggregation is not ok because, e.g., it skips sorting after aggregation. - if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) - return; - const Settings & settings = context->getSettingsRef(); /// Used to determine if we should use memory bound merging strategy. @@ -2985,7 +2877,15 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan) // has suitable sorting. Also don't create sort steps when there are no // columns to sort by, because the sort nodes are confused by this. It // happens in case of `over ()`. - if (!window.full_sort_description.empty() && (i == 0 || !sortIsPrefix(window, *windows_sorted[i - 1]))) + // Even if full_sort_description of both windows match, in case of different + // partitioning we need to add a SortingStep to reshuffle data in the streams. + bool need_sort = !window.full_sort_description.empty(); + if (need_sort && i != 0) + { + need_sort = !sortIsPrefix(window, *windows_sorted[i - 1]) + || (settings.max_threads != 1 && window.partition_by.size() != windows_sorted[i - 1]->partition_by.size()); + } + if (need_sort) { SortingStep::Settings sort_settings(*context); @@ -3333,5 +3233,13 @@ bool InterpreterSelectQuery::isQueryWithFinal(const SelectQueryInfo & info) return result; } +void registerInterpreterSelectQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index ec9612ad248..fbb53d71755 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -117,7 +117,7 @@ public: bool hasAggregation() const { return query_analyzer->hasAggregation(); } static void addEmptySourceToQueryPlan( - QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info, const ContextPtr & context_); + QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info); Names getRequiredColumns() { return required_columns; } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index eed9d03ab5a..f498fc7ec85 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -267,4 +268,13 @@ void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & el elem.used_row_policies.emplace(used_row_policy); } +void registerInterpreterSelectQueryAnalyzer(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectQueryAnalyzer", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 2ae74955e4f..16bc4b1fe2e 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -410,4 +411,13 @@ void InterpreterSelectWithUnionQuery::extendQueryLogElemImpl(QueryLogElement & e } } +void registerInterpreterSelectWithUnionQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context, args.options); + }; + factory.registerInterpreter("InterpreterSelectWithUnionQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 2c0baa0d4b3..261c781e0ba 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -91,4 +92,12 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta } } +void registerInterpreterSetQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterSetQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index a5b22387448..149ba6d7575 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -164,5 +165,13 @@ BlockIO InterpreterShowColumnsQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowColumnsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowColumnsQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0d60f13af66..9edac1fd8e1 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -104,4 +105,13 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() "statement"}})); } +void registerInterpreterShowCreateQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + + factory.registerInterpreter("InterpreterShowCreateQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowEngineQuery.cpp b/src/Interpreters/InterpreterShowEngineQuery.cpp index 2927fbd0f2d..f2d057a3fcf 100644 --- a/src/Interpreters/InterpreterShowEngineQuery.cpp +++ b/src/Interpreters/InterpreterShowEngineQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,4 +16,13 @@ BlockIO InterpreterShowEnginesQuery::execute() return executeQuery("SELECT * FROM system.table_engines ORDER BY name", getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowEnginesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowEnginesQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowFunctionsQuery.cpp b/src/Interpreters/InterpreterShowFunctionsQuery.cpp index a9da01b0988..e83f61eac53 100644 --- a/src/Interpreters/InterpreterShowFunctionsQuery.cpp +++ b/src/Interpreters/InterpreterShowFunctionsQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -43,4 +44,13 @@ FROM {}.{})", return rewritten_query; } +void registerInterpreterShowFunctionsQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowFunctionsQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 09b70e951db..e8005ead91e 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -104,5 +105,13 @@ BlockIO InterpreterShowIndexesQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowIndexesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowIndexesQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.cpp b/src/Interpreters/InterpreterShowProcesslistQuery.cpp index f711cc0dac9..7bdb94482da 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.cpp +++ b/src/Interpreters/InterpreterShowProcesslistQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,4 +16,13 @@ BlockIO InterpreterShowProcesslistQuery::execute() return executeQuery("SELECT * FROM system.processes ORDER BY elapsed DESC", getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowProcesslistQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowProcesslistQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterShowSettingQuery.cpp b/src/Interpreters/InterpreterShowSettingQuery.cpp index 45e9b8a1f1c..90acaa7b083 100644 --- a/src/Interpreters/InterpreterShowSettingQuery.cpp +++ b/src/Interpreters/InterpreterShowSettingQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -29,5 +30,13 @@ BlockIO InterpreterShowSettingQuery::execute() return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second; } +void registerInterpreterShowSettingQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowSettingQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 0ca6578128d..51038aaca46 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -221,4 +222,14 @@ BlockIO InterpreterShowTablesQuery::execute() /// sort the output of SHOW otherwise (SELECT * FROM (SHOW ...) ORDER BY ...) is rejected) and 3. some /// SQL tests can take advantage of this. + +void registerInterpreterShowTablesQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterShowTablesQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index db02ee13a4f..6aa707b8082 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -73,6 +74,10 @@ #include #endif +#if USE_JEMALLOC +#include +#endif + #include "config.h" namespace CurrentMetrics @@ -97,7 +102,6 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } - namespace ActionLocks { extern const StorageActionBlockType PartsMerge; @@ -727,6 +731,33 @@ BlockIO InterpreterSystemQuery::execute() resetCoverage(); break; } + +#if USE_JEMALLOC + case Type::JEMALLOC_PURGE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + purgeJemallocArenas(); + break; + } + case Type::JEMALLOC_ENABLE_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + setJemallocProfileActive(true); + break; + } + case Type::JEMALLOC_DISABLE_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + setJemallocProfileActive(false); + break; + } + case Type::JEMALLOC_FLUSH_PROFILE: + { + getContext()->checkAccess(AccessType::SYSTEM_JEMALLOC); + flushJemallocProfile("/tmp/jemalloc_clickhouse"); + break; + } +#endif default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query"); } @@ -1367,6 +1398,16 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_LISTEN); break; } +#if USE_JEMALLOC + case Type::JEMALLOC_PURGE: + case Type::JEMALLOC_ENABLE_PROFILE: + case Type::JEMALLOC_DISABLE_PROFILE: + case Type::JEMALLOC_FLUSH_PROFILE: + { + required_access.emplace_back(AccessType::SYSTEM_JEMALLOC); + break; + } +#endif case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: @@ -1378,4 +1419,13 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() return required_access; } +void registerInterpreterSystemQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterSystemQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index a0a82121ba8..b1758013f18 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -135,4 +136,13 @@ BlockIO InterpreterTransactionControlQuery::executeSetSnapshot(ContextMutablePtr return {}; } +void registerInterpreterTransactionControlQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterTransactionControlQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index bdd72b6d3ea..8401c47df6b 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -72,4 +73,13 @@ AccessRightsElements InterpreterUndropQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::UNDROP_TABLE, undrop.getDatabase(), undrop.getTable()); return required_access; } + +void registerInterpreterUndropQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterUndropQuery", create_fn); +} } diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index b71f3a9cc1c..58be12927b9 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -16,4 +17,13 @@ BlockIO InterpreterUseQuery::execute() return {}; } +void registerInterpreterUseQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterUseQuery", create_fn); +} + } diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 8865c47a785..2b68c5d7a10 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -12,6 +12,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -103,4 +104,13 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() return pipeline; } +void registerInterpreterWatchQuery(InterpreterFactory & factory) +{ + auto create_fn = [] (const InterpreterFactory::Arguments & args) + { + return std::make_unique(args.query, args.context); + }; + factory.registerInterpreter("InterpreterWatchQuery", create_fn); +} + } diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 6bd202a1dd7..0aee96ee9c4 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -747,15 +747,8 @@ void NotJoinedBlocks::extractColumnChanges(size_t right_pos, size_t result_pos) void NotJoinedBlocks::correctLowcardAndNullability(Block & block) { - for (auto & [pos, added] : right_nullability_changes) - { - auto & col = block.getByPosition(pos); - if (added) - JoinCommon::convertColumnToNullable(col); - else - JoinCommon::removeColumnNullability(col); - } - + /// First correct LowCardinality, then Nullability, + /// because LowCardinality(Nullable(T)) is possible, but not Nullable(LowCardinality(T)) for (auto & [pos, added] : right_lowcard_changes) { auto & col = block.getByPosition(pos); @@ -771,6 +764,15 @@ void NotJoinedBlocks::correctLowcardAndNullability(Block & block) col.type = recursiveRemoveLowCardinality(col.type); } } + + for (auto & [pos, added] : right_nullability_changes) + { + auto & col = block.getByPosition(pos); + if (added) + JoinCommon::convertColumnToNullable(col); + else + JoinCommon::removeColumnNullability(col); + } } void NotJoinedBlocks::addLeftColumns(Block & block, size_t rows_added) const diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 7993bda4bd9..5f6db0da520 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -12,32 +12,30 @@ namespace DB { -NamesAndTypesList MetricLogElement::getNamesAndTypes() +ColumnsDescription MetricLogElement::getColumnsDescription() { - NamesAndTypesList columns_with_type_and_name; + ColumnsDescription result; - columns_with_type_and_name.emplace_back("hostname", std::make_shared(std::make_shared())); - columns_with_type_and_name.emplace_back("event_date", std::make_shared()); - columns_with_type_and_name.emplace_back("event_time", std::make_shared()); - columns_with_type_and_name.emplace_back("event_time_microseconds", std::make_shared(6)); + result.add({"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}); + result.add({"event_date", std::make_shared(), "Event date."}); + result.add({"event_time", std::make_shared(), "Event time."}); + result.add({"event_time_microseconds", std::make_shared(6), "Event time with microseconds resolution."}); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) { - std::string name; - name += "ProfileEvent_"; - name += ProfileEvents::getName(ProfileEvents::Event(i)); - columns_with_type_and_name.emplace_back(std::move(name), std::make_shared()); + auto name = fmt::format("ProfileEvent_{}", ProfileEvents::getName(ProfileEvents::Event(i))); + const auto * comment = ProfileEvents::getDocumentation(ProfileEvents::Event(i)); + result.add({std::move(name), std::make_shared(), comment}); } for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) { - std::string name; - name += "CurrentMetric_"; - name += CurrentMetrics::getName(CurrentMetrics::Metric(i)); - columns_with_type_and_name.emplace_back(std::move(name), std::make_shared()); + auto name = fmt::format("CurrentMetric_{}", CurrentMetrics::getName(CurrentMetrics::Metric(i))); + const auto * comment = CurrentMetrics::getDocumentation(CurrentMetrics::Metric(i)); + result.add({std::move(name), std::make_shared(), comment}); } - return columns_with_type_and_name; + return result; } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index a57f1cebf71..482681d8276 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ struct MetricLogElement std::vector current_metrics; static std::string name() { return "MetricLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index a6ea03f8a03..b478382b10d 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -190,7 +190,7 @@ bool isStorageTouchedByMutations( if (context->getSettingsRef().allow_experimental_analyzer) { auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context); - InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits().ignoreProjections()); + InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits()); io = interpreter.execute(); } else @@ -200,7 +200,7 @@ bool isStorageTouchedByMutations( /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. interpreter_select_query.emplace( - select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); + select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits()); io = interpreter_select_query->execute(); } @@ -404,7 +404,7 @@ MutationsInterpreter::MutationsInterpreter( , available_columns(std::move(available_columns_)) , context(Context::createCopy(context_)) , settings(std::move(settings_)) - , select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits().ignoreProjections()) + , select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits()) { prepare(!settings.can_execute); } diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 40aaa63dd6e..fffc1e50da0 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -15,7 +15,7 @@ namespace DB { -NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes() +ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() { auto span_kind_type = std::make_shared( DataTypeEnum8::Values @@ -30,7 +30,8 @@ NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes() auto low_cardinality_string = std::make_shared(std::make_shared()); - return { + return ColumnsDescription + { {"hostname", low_cardinality_string}, {"trace_id", std::make_shared()}, {"span_id", std::make_shared()}, diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index 7368b184e5e..4907a8feb5a 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -15,7 +16,8 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span : OpenTelemetry::Span(span) {} static std::string name() { return "OpenTelemetrySpanLog"; } - static NamesAndTypesList getNamesAndTypes(); + + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 338775bfb0c..9819b8e3ec4 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -57,7 +57,7 @@ PartLogElement::PartMergeAlgorithm PartLogElement::getMergeAlgorithm(MergeAlgori throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast(merge_algorithm_)); } -NamesAndTypesList PartLogElement::getNamesAndTypes() +ColumnsDescription PartLogElement::getColumnsDescription() { auto event_type_datatype = std::make_shared( DataTypeEnum8::Values @@ -92,44 +92,57 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() ColumnsWithTypeAndName columns_with_type_and_name; - return { - {"hostname", std::make_shared(std::make_shared())}, - {"query_id", std::make_shared()}, - {"event_type", std::move(event_type_datatype)}, - {"merge_reason", std::move(merge_reason_datatype)}, - {"merge_algorithm", std::move(merge_algorithm_datatype)}, - {"event_date", std::make_shared()}, + return ColumnsDescription + { + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"query_id", std::make_shared(), "Identifier of the INSERT query that created this data part."}, + {"event_type", std::move(event_type_datatype), + "Type of the event that occurred with the data part. " + "Can have one of the following values: " + "NewPart — Inserting of a new data part, " + "MergeParts — Merging of data parts, " + "DownloadParts — Downloading a data part, " + "RemovePart — Removing or detaching a data part using DETACH PARTITION, " + "MutatePart — Mutating of a data part, " + "MovePart — Moving the data part from the one disk to another one."}, + {"merge_reason", std::move(merge_reason_datatype), + "The reason for the event with type MERGE_PARTS. Can have one of the following values: " + "NotAMerge — The current event has the type other than MERGE_PARTS, " + "RegularMerge — Some regular merge, " + "TTLDeleteMerge — Cleaning up expired data. " + "TTLRecompressMerge — Recompressing data part with the. "}, + {"merge_algorithm", std::move(merge_algorithm_datatype), "Merge algorithm for the event with type MERGE_PARTS. Can have one of the following values: Undecided, Horizontal, Vertical"}, + {"event_date", std::make_shared(), "Event date."}, + {"event_time", std::make_shared(), "Event time."}, + {"event_time_microseconds", std::make_shared(6), "Event time with microseconds precision."}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"duration_ms", std::make_shared(), "Duration of this operation."}, - {"duration_ms", std::make_shared()}, - - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"table_uuid", std::make_shared()}, - {"part_name", std::make_shared()}, - {"partition_id", std::make_shared()}, + {"database", std::make_shared(), "Name of the database the data part is in."}, + {"table", std::make_shared(), "Name of the table the data part is in."}, + {"table_uuid", std::make_shared(), "UUID of the table the data part belongs to."}, + {"part_name", std::make_shared(), "Name of the data part."}, + {"partition_id", std::make_shared(), "ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`."}, {"partition", std::make_shared()}, - {"part_type", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path_on_disk", std::make_shared()}, + {"part_type", std::make_shared(), "The type of the part. Possible values: Wide and Compact."}, + {"disk_name", std::make_shared(), "The disk name data part lies on."}, + {"path_on_disk", std::make_shared(), "Absolute path to the folder with data part files."}, - {"rows", std::make_shared()}, - {"size_in_bytes", std::make_shared()}, // On disk + {"rows", std::make_shared(), "The number of rows in the data part."}, + {"size_in_bytes", std::make_shared(), "Size of the data part on disk in bytes."}, /// Merge-specific info - {"merged_from", std::make_shared(std::make_shared())}, - {"bytes_uncompressed", std::make_shared()}, // Result bytes - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, + {"merged_from", std::make_shared(std::make_shared()), "An array of the source parts names which the current part was made up from."}, + {"bytes_uncompressed", std::make_shared(), "Uncompressed size of the resulting part in bytes."}, + {"read_rows", std::make_shared(), "The number of rows was read during the merge."}, + {"read_bytes", std::make_shared(), "The number of bytes was read during the merge."}, + {"peak_memory_usage", std::make_shared(), "The maximum amount of used during merge RAM"}, /// Is there an error during the execution or commit - {"error", std::make_shared()}, - {"exception", std::make_shared()}, + {"error", std::make_shared(), "The error code of the occurred exception."}, + {"exception", std::make_shared(), "Text message of the occurred error."}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "All the profile events captured during this operation."}, }; } diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 462314f2768..d4cd571d69b 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -93,7 +93,7 @@ struct PartLogElement static MergeReasonType getMergeReasonType(MergeType merge_type); static PartMergeAlgorithm getMergeAlgorithm(MergeAlgorithm merge_algorithm_); - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 68b5d63e613..088d193257c 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -17,9 +17,9 @@ namespace DB { -NamesAndTypesList ProcessorProfileLogElement::getNamesAndTypes() +ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 63791c0374c..49d2c21af89 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -36,7 +37,7 @@ struct ProcessorProfileLogElement size_t output_bytes{}; static std::string name() { return "ProcessorsProfileLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 1e259bb510e..ad6e344655b 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -31,7 +31,7 @@ namespace DB { -NamesAndTypesList QueryLogElement::getNamesAndTypes() +ColumnsDescription QueryLogElement::getColumnsDescription() { auto query_status_datatype = std::make_shared( DataTypeEnum8::Values @@ -54,91 +54,91 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() auto low_cardinality_string = std::make_shared(std::make_shared()); auto array_low_cardinality_string = std::make_shared(low_cardinality_string); - return + return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"type", std::move(query_status_datatype)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"query_start_time", std::make_shared()}, - {"query_start_time_microseconds", std::make_shared(6)}, - {"query_duration_ms", std::make_shared()}, + {"hostname", low_cardinality_string, "Hostname of the server executing the query."}, + {"type", std::move(query_status_datatype), "Type of an event that occurred when executing the query."}, + {"event_date", std::make_shared(), "Query starting date."}, + {"event_time", std::make_shared(), "Query starting time."}, + {"event_time_microseconds", std::make_shared(6), "Query starting time with microseconds precision."}, + {"query_start_time", std::make_shared(), "Start time of query execution."}, + {"query_start_time_microseconds", std::make_shared(6), "Start time of query execution with microsecond precision."}, + {"query_duration_ms", std::make_shared(), "Duration of query execution in milliseconds."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"result_rows", std::make_shared()}, - {"result_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, + {"read_rows", std::make_shared(), "Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for IN and JOIN. For distributed queries read_rows includes the total number of rows read at all replicas. Each replica sends it’s read_rows value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value."}, + {"read_bytes", std::make_shared(), "Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for IN and JOIN. For distributed queries read_bytes includes the total number of rows read at all replicas. Each replica sends it’s read_bytes value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value."}, + {"written_rows", std::make_shared(), "For INSERT queries, the number of written rows. For other queries, the column value is 0."}, + {"written_bytes", std::make_shared(), "For INSERT queries, the number of written bytes (uncompressed). For other queries, the column value is 0."}, + {"result_rows", std::make_shared(), "Number of rows in a result of the SELECT query, or a number of rows in the INSERT query."}, + {"result_bytes", std::make_shared(), "RAM volume in bytes used to store a query result."}, + {"memory_usage", std::make_shared(), "Memory consumption by the query."}, - {"current_database", low_cardinality_string}, - {"query", std::make_shared()}, - {"formatted_query", std::make_shared()}, - {"normalized_query_hash", std::make_shared()}, - {"query_kind", low_cardinality_string}, - {"databases", array_low_cardinality_string}, - {"tables", array_low_cardinality_string}, - {"columns", array_low_cardinality_string}, - {"partitions", array_low_cardinality_string}, - {"projections", array_low_cardinality_string}, - {"views", array_low_cardinality_string}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()}, + {"current_database", low_cardinality_string, "Name of the current database."}, + {"query", std::make_shared(), " Query string."}, + {"formatted_query", std::make_shared(), "Formatted query string."}, + {"normalized_query_hash", std::make_shared(), "Identical hash value without the values of literals for similar queries."}, + {"query_kind", low_cardinality_string, "Type of the query."}, + {"databases", array_low_cardinality_string, "Names of the databases present in the query."}, + {"tables", array_low_cardinality_string, "Names of the tables present in the query."}, + {"columns", array_low_cardinality_string, "Names of the columns present in the query."}, + {"partitions", array_low_cardinality_string, "Names of the partitions present in the query."}, + {"projections", array_low_cardinality_string, "Names of the projections used during the query execution."}, + {"views", array_low_cardinality_string, "Names of the (materialized or live) views present in the query."}, + {"exception_code", std::make_shared(), "Code of an exception."}, + {"exception", std::make_shared(), "Exception message."}, + {"stack_trace", std::make_shared(), "Stack trace. An empty string, if the query was completed successfully."}, - {"is_initial_query", std::make_shared()}, - {"user", low_cardinality_string}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"initial_user", low_cardinality_string}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, - {"initial_query_start_time", std::make_shared()}, - {"initial_query_start_time_microseconds", std::make_shared(6)}, - {"interface", std::make_shared()}, - {"is_secure", std::make_shared()}, - {"os_user", low_cardinality_string}, - {"client_hostname", low_cardinality_string}, - {"client_name", low_cardinality_string}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, - {"http_method", std::make_shared()}, - {"http_user_agent", low_cardinality_string}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Query type. Possible values: 1 — query was initiated by the client, 0 — query was initiated by another query as part of distributed query execution."}, + {"user", low_cardinality_string, "Name of the user who initiated the current query."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address that was used to make the query."}, + {"port", std::make_shared(), "The client port that was used to make the query."}, + {"initial_user", low_cardinality_string, "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, + {"initial_query_start_time", std::make_shared(), "Initial query starting time (for distributed query execution)."}, + {"initial_query_start_time_microseconds", std::make_shared(6), "Initial query starting time with microseconds precision (for distributed query execution)."}, + {"interface", std::make_shared(), "Interface that the query was initiated from. Possible values: 1 — TCP, 2 — HTTP."}, + {"is_secure", std::make_shared(), "The flag whether a query was executed over a secure interface"}, + {"os_user", low_cardinality_string, "Operating system username who runs clickhouse-client."}, + {"client_hostname", low_cardinality_string, "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", low_cardinality_string, "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface, 1 — GET method was used, 2 — POST method was used."}, + {"http_user_agent", low_cardinality_string, "HTTP header UserAgent passed in the HTTP query."}, + {"http_referer", std::make_shared(), "HTTP header Referer passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header X-Forwarded-For passed in the HTTP query."}, + {"quota_key", std::make_shared(), "The quota key specified in the quotas setting (see keyed)."}, + {"distributed_depth", std::make_shared(), "How many times a query was forwarded between servers."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"log_comment", std::make_shared()}, + {"log_comment", std::make_shared(), "Log comment. It can be set to arbitrary string no longer than max_query_size. An empty string if it is not defined."}, - {"thread_ids", std::make_shared(std::make_shared())}, - {"peak_threads_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, - {"Settings", std::make_shared(low_cardinality_string, low_cardinality_string)}, + {"thread_ids", std::make_shared(std::make_shared()), "Thread ids that are participating in query execution. These threads may not have run simultaneously."}, + {"peak_threads_usage", std::make_shared(), "Maximum count of simultaneous threads executing the query."}, + {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events"}, + {"Settings", std::make_shared(low_cardinality_string, low_cardinality_string), "Settings that were changed when the client ran the query. To enable logging changes to settings, set the log_query_settings parameter to 1."}, - {"used_aggregate_functions", array_low_cardinality_string}, - {"used_aggregate_function_combinators", array_low_cardinality_string}, - {"used_database_engines", array_low_cardinality_string}, - {"used_data_type_families", array_low_cardinality_string}, - {"used_dictionaries", array_low_cardinality_string}, - {"used_formats", array_low_cardinality_string}, - {"used_functions", array_low_cardinality_string}, - {"used_storages", array_low_cardinality_string}, - {"used_table_functions", array_low_cardinality_string}, + {"used_aggregate_functions", array_low_cardinality_string, "Canonical names of aggregate functions, which were used during query execution."}, + {"used_aggregate_function_combinators", array_low_cardinality_string, "Canonical names of aggregate functions combinators, which were used during query execution."}, + {"used_database_engines", array_low_cardinality_string, "Canonical names of database engines, which were used during query execution."}, + {"used_data_type_families", array_low_cardinality_string, "Canonical names of data type families, which were used during query execution."}, + {"used_dictionaries", array_low_cardinality_string, "Canonical names of dictionaries, which were used during query execution."}, + {"used_formats", array_low_cardinality_string, "Canonical names of formats, which were used during query execution."}, + {"used_functions", array_low_cardinality_string, "Canonical names of functions, which were used during query execution."}, + {"used_storages", array_low_cardinality_string, "Canonical names of storages, which were used during query execution."}, + {"used_table_functions", array_low_cardinality_string, "Canonical names of table functions, which were used during query execution."}, {"used_row_policies", array_low_cardinality_string}, {"transaction_id", getTransactionIDDataType()}, - {"query_cache_usage", std::move(query_cache_usage_datatype)}, + {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, }; diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index fe9b7cbdbc8..be5cb5835c5 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace ProfileEvents @@ -102,7 +103,7 @@ struct QueryLogElement static std::string name() { return "QueryLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index eed2a38e6da..d153e30a4ce 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -22,11 +22,11 @@ namespace DB { -NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() +ColumnsDescription QueryThreadLogElement::getColumnsDescription() { auto low_cardinality_string = std::make_shared(std::make_shared()); - return + return ColumnsDescription { {"hostname", low_cardinality_string}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index 684d7fce53e..fcce9232dc1 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -5,7 +5,7 @@ #include #include #include - +#include namespace DB { @@ -46,7 +46,7 @@ struct QueryThreadLogElement static std::string name() { return "QueryThreadLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index 7ad3e668bbb..c426f2d3cf0 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -19,7 +19,7 @@ namespace DB { -NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() +ColumnsDescription QueryViewsLogElement::getColumnsDescription() { auto view_status_datatype = std::make_shared(DataTypeEnum8::Values{ {"QueryStart", static_cast(QUERY_START)}, @@ -33,7 +33,8 @@ NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() {"Live", static_cast(ViewType::LIVE)}, {"Window", static_cast(ViewType::WINDOW)}}); - return { + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, @@ -57,7 +58,8 @@ NamesAndTypesList QueryViewsLogElement::getNamesAndTypes() {"status", std::move(view_status_datatype)}, {"exception_code", std::make_shared()}, {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()}}; + {"stack_trace", std::make_shared()} + }; } NamesAndAliases QueryViewsLogElement::getNamesAndAliases() diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index e28bce0b91c..000d0bd385a 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -77,7 +78,7 @@ struct QueryViewsLogElement static std::string name() { return "QueryLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index fdf74b2b926..967becb6e0f 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -14,7 +14,7 @@ namespace DB { -NamesAndTypesList S3QueueLogElement::getNamesAndTypes() +ColumnsDescription S3QueueLogElement::getColumnsDescription() { auto status_datatype = std::make_shared( DataTypeEnum8::Values @@ -22,7 +22,9 @@ NamesAndTypesList S3QueueLogElement::getNamesAndTypes() {"Processed", static_cast(S3QueueLogElement::S3QueueStatus::Processed)}, {"Failed", static_cast(S3QueueLogElement::S3QueueStatus::Failed)}, }); - return { + + return ColumnsDescription + { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index 76ff5ca0cdc..e0362bf9716 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -28,7 +29,7 @@ struct S3QueueLogElement static std::string name() { return "S3QueueLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index c91329c869c..1e08aec3813 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -33,14 +33,6 @@ struct SelectQueryOptions bool remove_duplicates = false; bool ignore_quota = false; bool ignore_limits = false; - /// This flag is needed to analyze query ignoring table projections. - /// It is needed because we build another one InterpreterSelectQuery while analyzing projections. - /// It helps to avoid infinite recursion. - bool ignore_projections = false; - /// This flag is also used for projection analysis. - /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. - /// It is also used to avoid adding aggregating step when aggregate projection is chosen. - bool is_projection_query = false; /// This flag is needed for projection description. /// Otherwise, keys for GROUP BY may be removed as constants. bool ignore_ast_optimizations = false; @@ -119,18 +111,6 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & ignoreProjections(bool value = true) - { - ignore_projections = value; - return *this; - } - - SelectQueryOptions & projectionQuery(bool value = true) - { - is_projection_query = value; - return *this; - } - SelectQueryOptions & ignoreAlias(bool value = true) { ignore_alias = value; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 162772061b5..d2f9fe8b325 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -112,8 +112,7 @@ public: throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session {} not found", session_id); /// Create a new session from current context. - auto context = Context::createCopy(global_context); - it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; + it = sessions.insert(std::make_pair(key, std::make_shared(key, global_context, timeout, *this))).first; const auto & session = it->second; if (!thread.joinable()) @@ -128,7 +127,7 @@ public: /// Use existing session. const auto & session = it->second; - LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); + LOG_TRACE(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); if (!session.unique()) throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id); @@ -703,6 +702,10 @@ void Session::releaseSessionID() { if (!named_session) return; + + prepared_client_info = getClientInfo(); + session_context.reset(); + named_session->release(); named_session = nullptr; } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 2249d8fbb2f..75e1414b8cb 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -8,6 +8,7 @@ #include #include +#include #include namespace Poco::Net { class SocketAddress; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 61750b5acca..a5bc5012292 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -67,7 +67,7 @@ SessionLogElement::SessionLogElement(const UUID & auth_id_, Type type_) std::tie(event_time, event_time_microseconds) = eventTime(); } -NamesAndTypesList SessionLogElement::getNamesAndTypes() +ColumnsDescription SessionLogElement::getColumnsDescription() { auto event_type = std::make_shared( DataTypeEnum8::Values @@ -119,7 +119,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() std::make_shared() }))); - return + return ColumnsDescription { {"hostname", lc_string_datatype}, {"type", std::move(event_type)}, diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 8757bc12270..0f79a3e5ca7 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -59,7 +60,7 @@ struct SessionLogElement static std::string name() { return "SessionLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 4f283a3f78d..c06fe8f5c90 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include "Common/quoteString.h" #include #include #include @@ -118,7 +120,8 @@ std::shared_ptr createSystemLog( const String & default_database_name, const String & default_table_name, const Poco::Util::AbstractConfiguration & config, - const String & config_prefix) + const String & config_prefix, + const String & comment) { if (!config.has(config_prefix)) { @@ -208,10 +211,14 @@ std::shared_ptr createSystemLog( if (!settings.empty()) log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings; } + + /// Add comment to AST. So it will be saved when the table will be renamed. + log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment)); } /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; + parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -267,32 +274,32 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { - query_log = createSystemLog(global_context, "system", "query_log", config, "query_log"); - query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log"); - part_log = createSystemLog(global_context, "system", "part_log", config, "part_log"); - trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log"); - crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log"); - text_log = createSystemLog(global_context, "system", "text_log", config, "text_log"); - metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log"); - filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log"); + query_log = createSystemLog(global_context, "system", "query_log", config, "query_log", "Contains information about executed queries, for example, start time, duration of processing, error messages."); + query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log", "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing."); + part_log = createSystemLog(global_context, "system", "part_log", config, "part_log", "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data."); + trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log", "Contains stack traces collected by the sampling query profiler."); + crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); + text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); + metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); + filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); filesystem_read_prefetches_log = createSystemLog( - global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log"); + global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); asynchronous_metric_log = createSystemLog( global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log"); + "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, which are saved once per minute."); opentelemetry_span_log = createSystemLog( global_context, "system", "opentelemetry_span_log", config, - "opentelemetry_span_log"); - query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log"); - zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log"); - session_log = createSystemLog(global_context, "system", "session_log", config, "session_log"); + "opentelemetry_span_log", "Contains information about trace spans for executed queries."); + query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log", "Contains information about the dependent views executed when running a query, for example, the view type or the execution time."); + zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log", "This table contains information about the parameters of the request to the ZooKeeper server and the response from it."); + session_log = createSystemLog(global_context, "system", "session_log", config, "session_log", "Contains information about all successful and failed login and logout events."); transactions_info_log = createSystemLog( - global_context, "system", "transactions_info_log", config, "transactions_info_log"); - processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log"); - asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log"); - backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log"); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log"); - blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log"); + global_context, "system", "transactions_info_log", config, "transactions_info_log", "Contains information about all transactions executed on a current server."); + processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); + asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); + backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); + s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) logs.emplace_back(query_log.get()); @@ -484,9 +491,9 @@ void SystemLog::flushImpl(const std::vector & to_flush, prepareTable(); ColumnsWithTypeAndName log_element_columns; - auto log_element_names_and_types = LogElement::getNamesAndTypes(); + auto log_element_names_and_types = LogElement::getColumnsDescription(); - for (const auto & name_and_type : log_element_names_and_types) + for (const auto & name_and_type : log_element_names_and_types.getAll()) log_element_columns.emplace_back(name_and_type.type, name_and_type.name); Block block(std::move(log_element_columns)); @@ -547,6 +554,8 @@ void SystemLog::prepareTable() if (old_create_query != create_query) { + /// TODO: Handle altering comment, because otherwise all table will be renamed. + /// Rename the existing table. int suffix = 0; while (DatabaseCatalog::instance().isTableExist( @@ -626,22 +635,11 @@ ASTPtr SystemLog::getCreateTableQuery() create->setTable(table_id.table_name); auto new_columns_list = std::make_shared(); + auto ordinary_columns = LogElement::getColumnsDescription(); + auto alias_columns = LogElement::getNamesAndAliases(); + ordinary_columns.setAliases(alias_columns); - if (const char * custom_column_list = LogElement::getCustomColumnList()) - { - ParserColumnDeclarationList parser; - const Settings & settings = getContext()->getSettingsRef(); - - ASTPtr columns_list_raw = parseQuery(parser, custom_column_list, "columns declaration list", settings.max_query_size, settings.max_parser_depth); - new_columns_list->set(new_columns_list->columns, columns_list_raw); - } - else - { - auto ordinary_columns = LogElement::getNamesAndTypes(); - auto alias_columns = LogElement::getNamesAndAliases(); - - new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns)); - } + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns)); create->set(create->columns_list, new_columns_list); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index d322af4329c..8c357e43be9 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -25,9 +25,9 @@ namespace DB /// fields static std::string name(); - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); + /// TODO: Remove this method, we can return aliases directly from getColumnsDescription(). static NamesAndAliases getNamesAndAliases(); - static const char * getCustomColumnList(); void appendToBlock(MutableColumns & columns) const; }; */ diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5f3492f0871..efe3fd7f740 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -376,7 +376,8 @@ void TableJoin::addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, boo * For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin, * when part of plan built and types of expression will be known. */ - inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage()); + bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); + inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage(), require_strict_keys_match); if (auto it = left_type_map.find(col.name); it != left_type_map.end()) { @@ -560,7 +561,9 @@ TableJoin::createConvertingActions( NameToNameMap left_column_rename; NameToNameMap right_column_rename; - inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage()); + /// FullSortingMerge join algorithm doesn't support joining keys with different types (e.g. String and Nullable(String)) + bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); + inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), require_strict_keys_match); if (!left_type_map.empty() || !right_type_map.empty()) { left_dag = applyKeyConvertToTable(left_sample_columns, left_type_map, JoinTableSide::Left, left_column_rename); @@ -614,11 +617,8 @@ TableJoin::createConvertingActions( } template -void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right) +void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool require_strict_keys_match) { - /// FullSortingMerge and PartialMerge join algorithms don't support joining keys with different types - /// (e.g. String and LowCardinality(String)) - bool require_strict_keys_match = isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE); if (!left_type_map.empty() || !right_type_map.empty()) return; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 247835d9c53..75e2342d1e9 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -218,7 +218,7 @@ private: /// Calculates common supertypes for corresponding join key columns. template - void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right); + void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool require_strict_keys_match); void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 2ea9b805a45..d6971bbac54 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -16,7 +16,7 @@ namespace DB { -NamesAndTypesList TextLogElement::getNamesAndTypes() +ColumnsDescription TextLogElement::getColumnsDescription() { auto priority_datatype = std::make_shared( DataTypeEnum8::Values @@ -32,27 +32,27 @@ NamesAndTypesList TextLogElement::getNamesAndTypes() {"Test", static_cast(Message::PRIO_TEST)}, }); - return + return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(), "Time of the entry."}, + {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, - {"thread_name", std::make_shared(std::make_shared())}, - {"thread_id", std::make_shared()}, + {"thread_name", std::make_shared(std::make_shared()), "Name of the thread from which the logging was done."}, + {"thread_id", std::make_shared(), "OS thread ID."}, - {"level", std::move(priority_datatype)}, - {"query_id", std::make_shared()}, - {"logger_name", std::make_shared(std::make_shared())}, - {"message", std::make_shared()}, + {"level", std::move(priority_datatype), "Entry level. Possible values: 1 or 'Fatal', 2 or 'Critical', 3 or 'Error', 4 or 'Warning', 5 or 'Notice', 6 or 'Information', 7 or 'Debug', 8 or 'Trace'."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"logger_name", std::make_shared(std::make_shared()), "Name of the logger (i.e. DDLWorker)."}, + {"message", std::make_shared(), "The message itself."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"source_file", std::make_shared(std::make_shared())}, - {"source_line", std::make_shared()}, + {"source_file", std::make_shared(std::make_shared()), "Source file from which the logging was done."}, + {"source_line", std::make_shared(), "Source line from which the logging was done."}, - {"message_format_string", std::make_shared(std::make_shared())}, + {"message_format_string", std::make_shared(std::make_shared()), "A format string that was used to format the message."}, }; } diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index bfeca324fde..cdb4de76722 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -30,7 +31,7 @@ struct TextLogElement std::string_view message_format_string; static std::string name() { return "TextLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index d52c3493eaa..26adb0cfc3f 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -25,9 +25,9 @@ const TraceDataType::Values TraceLogElement::trace_values = {"ProfileEvent", static_cast(TraceType::ProfileEvent)}, }; -NamesAndTypesList TraceLogElement::getNamesAndTypes() +ColumnsDescription TraceLogElement::getColumnsDescription() { - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 71aec0b50c4..f4cd29a7a2d 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -37,7 +38,7 @@ struct TraceLogElement ProfileEvents::Count increment{}; static std::string name() { return "TraceLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index 18a8b099ba4..e893be814ca 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -18,7 +18,7 @@ namespace DB { -NamesAndTypesList TransactionsInfoLogElement::getNamesAndTypes() +ColumnsDescription TransactionsInfoLogElement::getColumnsDescription() { auto type_enum = std::make_shared( DataTypeEnum8::Values @@ -32,7 +32,7 @@ NamesAndTypesList TransactionsInfoLogElement::getNamesAndTypes() {"UnlockPart", static_cast(UNLOCK_PART)}, }); - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"type", std::move(type_enum)}, diff --git a/src/Interpreters/TransactionsInfoLog.h b/src/Interpreters/TransactionsInfoLog.h index fc3783b5916..0a607704e74 100644 --- a/src/Interpreters/TransactionsInfoLog.h +++ b/src/Interpreters/TransactionsInfoLog.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -39,7 +40,7 @@ struct TransactionsInfoLogElement String part_name; static std::string name() { return "TransactionsInfoLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 729e2ed6007..b740852b808 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -171,16 +171,13 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) /// copy shared pointer to args in order to ensure lifetime auto args_ast = function->arguments; - - /** remove function call and take a step back to ensure - * next iteration does not skip not yet processed data - */ - remove_expr_at_index(i); - - /// copy non-literal arguments + /// Replace function call in 'group_exprs' with non-literal arguments. + const auto & erase_position = group_exprs.begin() + i; + group_exprs.erase(erase_position); + const auto & insert_position = group_exprs.begin() + i; std::remove_copy_if( std::begin(args_ast->children), std::end(args_ast->children), - std::back_inserter(group_exprs), is_literal + std::inserter(group_exprs, insert_position), is_literal ); } else if (is_literal(group_exprs[i])) diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index b55a9f540c5..9cc31edfe56 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -57,7 +57,7 @@ DataTypePtr getCoordinationErrorCodesEnumType() }); } -NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() +ColumnsDescription ZooKeeperLogElement::getColumnsDescription() { auto type_enum = std::make_shared( DataTypeEnum8::Values @@ -120,7 +120,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"NOTCONNECTED", static_cast(Coordination::State::NOTCONNECTED)}, }); - return + return ColumnsDescription { {"hostname", std::make_shared(std::make_shared())}, {"type", std::move(type_enum)}, diff --git a/src/Interpreters/ZooKeeperLog.h b/src/Interpreters/ZooKeeperLog.h index d79b75ec85f..90d36d22a59 100644 --- a/src/Interpreters/ZooKeeperLog.h +++ b/src/Interpreters/ZooKeeperLog.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -68,7 +69,7 @@ struct ZooKeeperLogElement static std::string name() { return "ZooKeeperLog"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } void appendToBlock(MutableColumns & columns) const; static const char * getCustomColumnList() { return nullptr; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 63804d2d86f..4b5a6a84e17 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1010,7 +1010,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getUserName()); + QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1043,7 +1043,7 @@ static std::tuple executeQueryImpl( } } - interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); + interpreter = InterpreterFactory::instance().get(ast, context, SelectQueryOptions(stage).setInternal(internal)); const auto & query_settings = context->getSettingsRef(); if (context->getCurrentTransaction() && query_settings.throw_on_unsupported_query_inside_transaction) @@ -1123,7 +1123,8 @@ static std::tuple executeQueryImpl( { QueryCache::Key key( ast, res.pipeline.getHeader(), - context->getUserName(), settings.query_cache_share_between_users, + context->getUserID(), context->getCurrentRoles(), + settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), settings.query_cache_compress_entries); @@ -1434,11 +1435,12 @@ void executeQuery( const auto & compression_method_node = ast_query_with_output->compression->as(); compression_method = compression_method_node.value.safeGet(); } - + const auto & settings = context->getSettingsRef(); compressed_buffer = wrapWriteBufferWithCompressionMethod( std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), chooseCompressionMethod(out_file, compression_method), - /* compression level = */ 3 + /* compression level = */ static_cast(settings.output_format_compression_level), + /* zstd_window_log = */ static_cast(settings.output_format_compression_zstd_window_log) ); } diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index fd023754abf..6f84a60f2af 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "Processors/Executors/PullingPipelineExecutor.h" #include @@ -29,6 +30,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) context->makeGlobalContext(); context->setApplicationType(Context::ApplicationType::LOCAL); + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 2e9ee0af724..1295a4d5a75 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -20,12 +20,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; } -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context) -{ - return settings.max_parallel_replicas > 1 && context.getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; -} - ASTPtr getCustomKeyFilterForParallelReplica( size_t replicas_count, size_t replica_num, @@ -34,7 +28,7 @@ ASTPtr getCustomKeyFilterForParallelReplica( const ColumnsDescription & columns, const ContextPtr & context) { - assert(replicas_count > 1); + chassert(replicas_count > 1); if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT) { // first we do modulo with replica count diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h index c35f00f3dfd..1506c1992c0 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h @@ -9,9 +9,6 @@ namespace DB { - -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context); - /// Get AST for filter created from custom_key /// replica_num is the number of the replica for which we are generating filter starting from 0 ASTPtr getCustomKeyFilterForParallelReplica( diff --git a/src/Interpreters/registerInterpreters.cpp b/src/Interpreters/registerInterpreters.cpp new file mode 100644 index 00000000000..481d0597a85 --- /dev/null +++ b/src/Interpreters/registerInterpreters.cpp @@ -0,0 +1,120 @@ +#include + +namespace DB +{ + +void registerInterpreterSelectQuery(InterpreterFactory & factory); +void registerInterpreterSelectQueryAnalyzer(InterpreterFactory & factory); +void registerInterpreterSelectWithUnionQuery(InterpreterFactory & factory); +void registerInterpreterSelectIntersectExceptQuery(InterpreterFactory & factory); +void registerInterpreterInsertQuery(InterpreterFactory & factory); +void registerInterpreterCreateQuery(InterpreterFactory & factory); +void registerInterpreterDropQuery(InterpreterFactory & factory); +void registerInterpreterUndropQuery(InterpreterFactory & factory); +void registerInterpreterRenameQuery(InterpreterFactory & factory); +void registerInterpreterShowTablesQuery(InterpreterFactory & factory); +void registerInterpreterShowColumnsQuery(InterpreterFactory & factory); +void registerInterpreterShowIndexesQuery(InterpreterFactory & factory); +void registerInterpreterShowSettingQuery(InterpreterFactory & factory); +void registerInterpreterShowEnginesQuery(InterpreterFactory & factory); +void registerInterpreterShowFunctionsQuery(InterpreterFactory & factory); +void registerInterpreterUseQuery(InterpreterFactory & factory); +void registerInterpreterSetQuery(InterpreterFactory & factory); +void registerInterpreterSetRoleQuery(InterpreterFactory & factory); +void registerInterpreterOptimizeQuery(InterpreterFactory & factory); +void registerInterpreterExistsQuery(InterpreterFactory & factory); +void registerInterpreterShowCreateQuery(InterpreterFactory & factory); +void registerInterpreterDescribeQuery(InterpreterFactory & factory); +void registerInterpreterDescribeCacheQuery(InterpreterFactory & factory); +void registerInterpreterExplainQuery(InterpreterFactory & factory); +void registerInterpreterShowProcesslistQuery(InterpreterFactory & factory); +void registerInterpreterAlterQuery(InterpreterFactory & factory); +void registerInterpreterAlterNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterCheckQuery(InterpreterFactory & factory); +void registerInterpreterKillQueryQuery(InterpreterFactory & factory); +void registerInterpreterSystemQuery(InterpreterFactory & factory); +void registerInterpreterWatchQuery(InterpreterFactory & factory); +void registerInterpreterCreateUserQuery(InterpreterFactory & factory); +void registerInterpreterCreateRoleQuery(InterpreterFactory & factory); +void registerInterpreterCreateQuotaQuery(InterpreterFactory & factory); +void registerInterpreterCreateRowPolicyQuery(InterpreterFactory & factory); +void registerInterpreterCreateSettingsProfileQuery(InterpreterFactory & factory); +void registerInterpreterDropAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterMoveAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterDropNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterGrantQuery(InterpreterFactory & factory); +void registerInterpreterShowCreateAccessEntityQuery(InterpreterFactory & factory); +void registerInterpreterShowGrantsQuery(InterpreterFactory & factory); +void registerInterpreterShowAccessEntitiesQuery(InterpreterFactory & factory); +void registerInterpreterShowAccessQuery(InterpreterFactory & factory); +void registerInterpreterShowPrivilegesQuery(InterpreterFactory & factory); +void registerInterpreterExternalDDLQuery(InterpreterFactory & factory); +void registerInterpreterTransactionControlQuery(InterpreterFactory & factory); +void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory); +void registerInterpreterDropFunctionQuery(InterpreterFactory & factory); +void registerInterpreterCreateIndexQuery(InterpreterFactory & factory); +void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory); +void registerInterpreterDropIndexQuery(InterpreterFactory & factory); +void registerInterpreterBackupQuery(InterpreterFactory & factory); +void registerInterpreterDeleteQuery(InterpreterFactory & factory); + +void registerInterpreters() +{ + auto & factory = InterpreterFactory::instance(); + + registerInterpreterSelectQuery(factory); + registerInterpreterSelectQueryAnalyzer(factory); + registerInterpreterSelectWithUnionQuery(factory); + registerInterpreterSelectIntersectExceptQuery(factory); + registerInterpreterInsertQuery(factory); + registerInterpreterCreateQuery(factory); + registerInterpreterDropQuery(factory); + registerInterpreterUndropQuery(factory); + registerInterpreterRenameQuery(factory); + registerInterpreterShowTablesQuery(factory); + registerInterpreterShowColumnsQuery(factory); + registerInterpreterShowIndexesQuery(factory); + registerInterpreterShowSettingQuery(factory); + registerInterpreterShowEnginesQuery(factory); + registerInterpreterShowFunctionsQuery(factory); + registerInterpreterUseQuery(factory); + registerInterpreterSetQuery(factory); + registerInterpreterSetRoleQuery(factory); + registerInterpreterOptimizeQuery(factory); + registerInterpreterExistsQuery(factory); + registerInterpreterShowCreateQuery(factory); + registerInterpreterDescribeQuery(factory); + registerInterpreterDescribeCacheQuery(factory); + registerInterpreterExplainQuery(factory); + registerInterpreterShowProcesslistQuery(factory); + registerInterpreterAlterQuery(factory); + registerInterpreterAlterNamedCollectionQuery(factory); + registerInterpreterCheckQuery(factory); + registerInterpreterKillQueryQuery(factory); + registerInterpreterSystemQuery(factory); + registerInterpreterWatchQuery(factory); + registerInterpreterCreateUserQuery(factory); + registerInterpreterCreateRoleQuery(factory); + registerInterpreterCreateQuotaQuery(factory); + registerInterpreterCreateRowPolicyQuery(factory); + registerInterpreterCreateSettingsProfileQuery(factory); + registerInterpreterDropAccessEntityQuery(factory); + registerInterpreterMoveAccessEntityQuery(factory); + registerInterpreterDropNamedCollectionQuery(factory); + registerInterpreterGrantQuery(factory); + registerInterpreterShowCreateAccessEntityQuery(factory); + registerInterpreterShowGrantsQuery(factory); + registerInterpreterShowAccessEntitiesQuery(factory); + registerInterpreterShowAccessQuery(factory); + registerInterpreterShowPrivilegesQuery(factory); + registerInterpreterExternalDDLQuery(factory); + registerInterpreterTransactionControlQuery(factory); + registerInterpreterCreateFunctionQuery(factory); + registerInterpreterDropFunctionQuery(factory); + registerInterpreterCreateIndexQuery(factory); + registerInterpreterCreateNamedCollectionQuery(factory); + registerInterpreterDropIndexQuery(factory); + registerInterpreterBackupQuery(factory); + registerInterpreterDeleteQuery(factory); +} +} diff --git a/src/Interpreters/registerInterpreters.h b/src/Interpreters/registerInterpreters.h new file mode 100644 index 00000000000..9f0c3bbec22 --- /dev/null +++ b/src/Interpreters/registerInterpreters.h @@ -0,0 +1,6 @@ +#pragma once + +namespace DB +{ +void registerInterpreters(); +} diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 30b172ecbb8..c2854e2235c 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -4,17 +4,9 @@ #include #include #include +#include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 6976683678e..34a1ae6e8e0 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -5,17 +5,10 @@ #include #include #include +#include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif namespace DB { diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 90d9ede7337..4bb1d2eef30 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -143,10 +143,19 @@ ASTPtr ASTProjectionSelectQuery::cloneToASTSelect() const if (groupBy()) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, groupBy()->clone()); + /// Attach settings to prevent AST transformations. We already have ignored AST optimizations + /// for projection queries. Only remaining settings need to be added here. + /// + /// NOTE: `count_distinct_implementation` has already been selected during the creation of the + /// projection, so there will be no countDistinct(...) to rewrite in projection queries. + /// Ideally, we should aim for a unique and normalized query representation that remains + /// unchanged after the AST rewrite. For instance, we can add -OrEmpty, realIn as the default + /// behavior w.r.t -OrNull, nullIn. auto settings_query = std::make_shared(); SettingsChanges settings_changes; - settings_changes.insertSetting("optimize_aggregators_of_group_by_keys", false); - settings_changes.insertSetting("optimize_group_by_function_keys", false); + settings_changes.insertSetting("aggregate_functions_null_for_empty", false); + settings_changes.insertSetting("transform_null_in", false); + settings_changes.insertSetting("legacy_column_name_of_tuple_literal", false); settings_query->changes = std::move(settings_changes); settings_query->is_standalone = false; select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings_query)); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index fc26f5dee1c..908b5049bc9 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -46,6 +46,12 @@ public: WAIT_LOADING_PARTS, DROP_REPLICA, DROP_DATABASE_REPLICA, +#if USE_JEMALLOC + JEMALLOC_PURGE, + JEMALLOC_ENABLE_PROFILE, + JEMALLOC_DISABLE_PROFILE, + JEMALLOC_FLUSH_PROFILE, +#endif SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index a6a7b2ee320..b0eec16f56f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -4,8 +4,8 @@ #include #include -#include #include +#include namespace DB @@ -135,12 +135,12 @@ bool ToDecimal::convertImpl(String & out, IParser::Pos & pos) res = getConvertedArgument(fn_name, pos); precision = 17; } - static const std::regex expr{"^[0-9]+e[+-]?[0-9]+"}; - bool is_string = std::any_of(res.begin(), res.end(), ::isalpha) && !(std::regex_match(res, expr)); + static const re2::RE2 expr("^[0-9]+e[+-]?[0-9]+"); + bool is_string = std::any_of(res.begin(), res.end(), ::isalpha) && !(re2::RE2::FullMatch(res, expr)); if (is_string) out = "NULL"; - else if (std::regex_match(res, expr)) + else if (re2::RE2::FullMatch(res, expr)) { auto exponential_pos = res.find('e'); if (res[exponential_pos + 1] == '+' || res[exponential_pos + 1] == '-') diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 9ddc009307a..8530fa6623d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -9,7 +10,6 @@ #include #include "Poco/String.h" #include -#include namespace DB { @@ -224,13 +224,14 @@ bool DatatypeDecimal::convertImpl(String & out, IParser::Pos & pos) --pos; arg = getArgument(fn_name, pos); - //NULL expr returns NULL not exception - static const std::regex expr{"^[0-9]+e[+-]?[0-9]+"}; - bool is_string = std::any_of(arg.begin(), arg.end(), ::isalpha) && Poco::toUpper(arg) != "NULL" && !(std::regex_match(arg, expr)); + /// NULL expr returns NULL not exception + static const re2::RE2 expr("^[0-9]+e[+-]?[0-9]+"); + assert(expr.ok()); + bool is_string = std::any_of(arg.begin(), arg.end(), ::isalpha) && Poco::toUpper(arg) != "NULL" && !(re2::RE2::FullMatch(arg, expr)); if (is_string) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); - if (std::regex_match(arg, expr)) + if (re2::RE2::FullMatch(arg, expr)) { auto exponential_pos = arg.find('e'); if (arg[exponential_pos + 1] == '+' || arg[exponential_pos + 1] == '-') diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 18ac5164df9..e5f40ee604d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 556743e70ec..4914c3889dd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -15,12 +15,13 @@ #include #include #include +#include #include -#include #include #include "gtest_common.h" #include + namespace { using namespace DB; @@ -71,14 +72,14 @@ TEST_P(ParserTest, parseQuery) if (input_text.starts_with("ATTACH")) { auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else { WriteBufferFromOwnString buf; formatAST(*ast->clone(), buf, false, false); String formatted_ast = buf.str(); - EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); + EXPECT_TRUE(re2::RE2::FullMatch(formatted_ast, expected_ast)); } } } diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index 7710df94644..52d3ceb47e2 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -6,9 +6,11 @@ #include #include +#include + #include -#include + namespace { @@ -62,14 +64,14 @@ TEST_P(ParserKQLTest, parseKQLQuery) if (input_text.starts_with("ATTACH")) { auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else { DB::WriteBufferFromOwnString buf; formatAST(*ast->clone(), buf, false, false); String formatted_ast = buf.str(); - EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); + EXPECT_TRUE(re2::RE2::FullMatch(formatted_ast, expected_ast)); } } } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index a0c0fce4934..bbe138705f7 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -908,9 +908,17 @@ void addWindowSteps(QueryPlan & query_plan, * has suitable sorting. Also don't create sort steps when there are no * columns to sort by, because the sort nodes are confused by this. It * happens in case of `over ()`. + * Even if full_sort_description of both windows match, in case of different + * partitioning we need to add a SortingStep to reshuffle data in the streams. */ - if (!window_description.full_sort_description.empty() && - (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description))) + + bool need_sort = !window_description.full_sort_description.empty(); + if (need_sort && i != 0) + { + need_sort = !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description) + || (settings.max_threads != 1 && window_description.partition_by.size() != window_descriptions[i - 1].partition_by.size()); + } + if (need_sort) { SortingStep::Settings sort_settings(*query_context); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index f6569d998f1..552f25d7035 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -809,9 +809,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres else { if (auto * distributed = typeid_cast(storage.get()); - distributed && canUseCustomKey(settings, *distributed->getCluster(), *query_context)) + distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - table_expression_query_info.use_custom_key = true; planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); } } @@ -846,9 +845,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres query_context->getQueryContext()->addQueryAccessInfo( backQuoteIfNeed(local_storage_id.getDatabaseName()), local_storage_id.getFullTableName(), - columns_names, - {}, - {}); + columns_names); } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index ba29cab5956..2df5915c72a 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -398,7 +398,7 @@ QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & storage_dummy_columns.emplace_back(projection_column); } - storage_dummy = std::make_shared(StorageID{"dummy", "subquery_" + std::to_string(subquery_index)}, ColumnsDescription(storage_dummy_columns)); + storage_dummy = std::make_shared(StorageID{"dummy", "subquery_" + std::to_string(subquery_index)}, ColumnsDescription::fromNamesAndTypes(storage_dummy_columns)); ++subquery_index; } @@ -455,8 +455,7 @@ QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTyp SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context) { SelectQueryInfo select_query_info; - select_query_info.original_query = queryNodeToSelectQuery(query_tree); - select_query_info.query = select_query_info.original_query; + select_query_info.query = queryNodeToSelectQuery(query_tree); select_query_info.query_tree = query_tree; select_query_info.planner_context = planner_context; return select_query_info; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index ab430fb6312..38df7ad0d0a 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include #include @@ -34,15 +36,6 @@ #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { namespace ErrorCodes diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 2602f8b881d..316a84fe94f 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -603,18 +603,21 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( memcpy(buf, istr.position(), bytes_to_copy); buf[bytes_to_copy] = 0; - char * pos_double = buf; + /// Skip leading zeroes - we don't want any funny octal business + char * non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy); + + char * pos_double = non_zero_buf; errno = 0; - Float64 float_value = std::strtod(buf, &pos_double); - if (pos_double == buf || errno == ERANGE || float_value < 0) + Float64 float_value = std::strtod(non_zero_buf, &pos_double); + if (pos_double == non_zero_buf || errno == ERANGE || float_value < 0) return false; if (negative) float_value = -float_value; - char * pos_integer = buf; + char * pos_integer = non_zero_buf; errno = 0; - UInt64 uint_value = std::strtoull(buf, &pos_integer, 0); + UInt64 uint_value = std::strtoull(non_zero_buf, &pos_integer, 0); if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63))) { istr.position() += pos_integer - buf; diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 6d8f1ab55cb..02ca2734ff8 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -448,6 +448,7 @@ PODArray & compress(PODArray & source, PODArray & scratch, Com std::move(dest_buf), method, /*level*/ 3, + /*zstd_window_log*/ 0, source.size(), /*existing_memory*/ source.data()); chassert(compressed_buf->position() == source.data()); diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index c32e08dad52..7612228f8c4 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -11,15 +12,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 433422a7c30..bc1b3695d88 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -1080,10 +1080,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, /// If we don't have filtration, we can pushdown limit to reading stage for optimizations. UInt64 limit = (select_query->hasFiltration() || select_query->groupBy()) ? 0 : InterpreterSelectQuery::getLimitForSorting(*select_query, context); - auto order_info = order_optimizer->getInputOrder( - query_info.projection ? query_info.projection->desc->metadata : read_from_merge_tree->getStorageMetadata(), - context, - limit); + auto order_info = order_optimizer->getInputOrder(read_from_merge_tree->getStorageMetadata(), context, limit); if (order_info) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index d1f0c1ebe5e..c3e651154ae 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -590,7 +590,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & else if (!candidates.real.empty()) { auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); - size_t ordinary_reading_marks = ordinary_reading_select_result->marks(); + size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. if (ordinary_reading_marks == 0) @@ -599,7 +599,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & return false; } - const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges(); + const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; /// Selecting best candidate. for (auto & candidate : candidates.real) @@ -615,7 +615,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & reader, required_column_names, parts_with_ranges, - metadata, query_info, context, max_added_blocks, @@ -642,6 +641,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & return false; } + Context::QualifiedProjectionName projection_name; chassert(best_candidate != nullptr); QueryPlanStepPtr projection_reading; @@ -654,23 +654,19 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); - projection_reading = std::make_unique( - std::move(pipe), - context, - query_info.is_internal - ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = candidates.minmax_projection->candidate.projection->name, - }); + projection_reading = std::make_unique(std::move(pipe)); has_ordinary_parts = false; + + projection_name = Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name, + }; } else { auto storage_snapshot = reading->getStorageSnapshot(); - auto proj_snapshot = std::make_shared( - storage_snapshot->storage, storage_snapshot->metadata, storage_snapshot->object_columns); + auto proj_snapshot = std::make_shared(storage_snapshot->storage, storage_snapshot->metadata); proj_snapshot->addProjection(best_candidate->projection); auto query_info_copy = query_info; @@ -693,23 +689,29 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & { auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); - projection_reading = std::make_unique( - std::move(pipe), - context, - query_info.is_internal - ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = best_candidate->projection->name, - }); + projection_reading = std::make_unique(std::move(pipe)); } + projection_name = Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }; + has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; if (has_ordinary_parts) reading->setAnalyzedResult(std::move(best_candidate->merge_tree_ordinary_select_result_ptr)); } + if (!query_info.is_internal && context->hasQueryContext()) + { + context->getQueryContext()->addQueryAccessInfo(Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); + } + // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection reading header {}", // projection_reading->getOutputStream().header.dumpStructure()); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index e4b3e4f84ab..4e2fa5b2389 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -142,7 +142,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); - size_t ordinary_reading_marks = ordinary_reading_select_result->marks(); + size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. if (ordinary_reading_marks == 0) @@ -151,7 +151,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) return false; } - const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges(); + const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; std::shared_ptr max_added_blocks = getMaxAddedBlocks(reading); @@ -173,7 +173,6 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) reader, required_columns, parts_with_ranges, - metadata, query_info, context, max_added_blocks, @@ -196,8 +195,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) } auto storage_snapshot = reading->getStorageSnapshot(); - auto proj_snapshot = std::make_shared( - storage_snapshot->storage, storage_snapshot->metadata, storage_snapshot->object_columns); //, storage_snapshot->data); + auto proj_snapshot = std::make_shared(storage_snapshot->storage, storage_snapshot->metadata); proj_snapshot->addProjection(best_candidate->projection); auto query_info_copy = query_info; @@ -219,16 +217,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!projection_reading) { Pipe pipe(std::make_shared(proj_snapshot->getSampleBlockForColumns(required_columns))); - projection_reading = std::make_unique( - std::move(pipe), - context, - query_info.is_internal - ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = best_candidate->projection->name, - }); + projection_reading = std::make_unique(std::move(pipe)); + } + + if (!query_info.is_internal && context->hasQueryContext()) + { + context->getQueryContext()->addQueryAccessInfo(Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index c3b3449857b..1ac759df1d1 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -211,7 +211,6 @@ bool analyzeProjectionCandidate( const MergeTreeDataSelectExecutor & reader, const Names & required_column_names, const RangesInDataParts & parts_with_ranges, - const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, @@ -242,7 +241,6 @@ bool analyzeProjectionCandidate( std::move(projection_parts), nullptr, required_column_names, - metadata, candidate.projection->metadata, query_info, /// How it is actually used? I hope that for index we need only added_filter_nodes added_filter_nodes, @@ -250,23 +248,17 @@ bool analyzeProjectionCandidate( context->getSettingsRef().max_threads, max_added_blocks); - if (projection_result_ptr->error()) - return false; - candidate.merge_tree_projection_select_result_ptr = std::move(projection_result_ptr); - candidate.sum_marks += candidate.merge_tree_projection_select_result_ptr->marks(); + candidate.sum_marks += candidate.merge_tree_projection_select_result_ptr->selected_marks; if (!normal_parts.empty()) { /// TODO: We can reuse existing analysis_result by filtering out projection parts auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), std::move(alter_conversions)); - if (normal_result_ptr->error()) - return false; - - if (normal_result_ptr->marks() != 0) + if (normal_result_ptr->selected_marks != 0) { - candidate.sum_marks += normal_result_ptr->marks(); + candidate.sum_marks += normal_result_ptr->selected_marks; candidate.merge_tree_ordinary_select_result_ptr = std::move(normal_result_ptr); } } diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 055ca5d4084..cc833a86925 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -1,31 +1,15 @@ #pragma once #include #include +#include namespace DB { -class ReadFromMergeTree; - using PartitionIdToMaxBlock = std::unordered_map; - struct ProjectionDescription; - class MergeTreeDataSelectExecutor; -struct MergeTreeDataSelectAnalysisResult; -using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; - -class IMergeTreeDataPart; -using DataPartPtr = std::shared_ptr; -using DataPartsVector = std::vector; -struct RangesInDataParts; - -struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; - -struct SelectQueryInfo; - } namespace DB::QueryPlanOptimizations @@ -61,8 +45,8 @@ struct ProjectionCandidate /// Analysis result, separate for parts with and without projection. /// Analysis is done in order to estimate the number of marks we are going to read. /// For chosen projection, it is reused for reading step. - MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr; - MergeTreeDataSelectAnalysisResultPtr merge_tree_ordinary_select_result_ptr; + ReadFromMergeTree::AnalysisResultPtr merge_tree_projection_select_result_ptr; + ReadFromMergeTree::AnalysisResultPtr merge_tree_ordinary_select_result_ptr; }; /// This function fills ProjectionCandidate structure for specified projection. @@ -73,7 +57,6 @@ bool analyzeProjectionCandidate( const MergeTreeDataSelectExecutor & reader, const Names & required_column_names, const RangesInDataParts & parts_with_ranges, - const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 6ba6ed67456..7c66c0cc8df 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -228,7 +228,7 @@ struct SplitPartsRangesResult RangesInDataParts intersecting_parts_ranges; }; -SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, bool force_process_all_ranges) +SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts) { /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts. * @@ -349,7 +349,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, if (previous_part_range.event == PartsRangesIterator::EventType::RangeStart) { /// If part level is 0, we must process whole previous part because it can contain duplicate primary keys - if (force_process_all_ranges || ranges_in_data_parts[previous_part_range.part_index].data_part->info.level == 0) + if (ranges_in_data_parts[previous_part_range.part_index].data_part->info.level == 0) continue; /// Case 1 Range Start after Range Start @@ -384,7 +384,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, MarkRange other_interval_range = other_interval_it->second; /// If part level is 0, we must process whole other intersecting part because it can contain duplicate primary keys - if (force_process_all_ranges || ranges_in_data_parts[other_interval_part_index].data_part->info.level == 0) + if (ranges_in_data_parts[other_interval_part_index].data_part->info.level == 0) continue; /// Case 2 Range Start after Range End @@ -419,7 +419,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, * * If part level is 0, we must process whole part because it can contain duplicate primary keys. */ - if (intersecting_parts != 1 || force_process_all_ranges || ranges_in_data_parts[current_part_range.part_index].data_part->info.level == 0) + if (intersecting_parts != 1 || ranges_in_data_parts[current_part_range.part_index].data_part->info.level == 0) { add_intersecting_range(current_part_range.part_index, part_index_start_to_range[current_part_range.part_index]); part_index_start_to_range.erase(current_part_range.part_index); @@ -719,10 +719,16 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( SplitPartsWithRangesByPrimaryKeyResult result; - SplitPartsRangesResult split_result = splitPartsRanges(std::move(parts), force_process_all_ranges); - result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); + RangesInDataParts intersecting_parts_ranges = std::move(parts); - auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(std::move(split_result.intersecting_parts_ranges), max_layers); + if (!force_process_all_ranges) + { + SplitPartsRangesResult split_result = splitPartsRanges(intersecting_parts_ranges); + result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); + intersecting_parts_ranges = std::move(split_result.intersecting_parts_ranges); + } + + auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(intersecting_parts_ranges, max_layers); auto filters = buildFilters(primary_key, borders); result.merging_pipes.resize(layers.size()); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 68786bdec6c..74f48c45be2 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -124,12 +124,6 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( }; } -static const PrewhereInfoPtr & getPrewhereInfoFromQueryInfo(const SelectQueryInfo & query_info) -{ - return query_info.projection ? query_info.projection->prewhere_info - : query_info.prewhere_info; -} - static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) { for (const auto & part : parts) @@ -252,11 +246,11 @@ ReadFromMergeTree::ReadFromMergeTree( bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, + AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot_->getSampleBlockForColumns(real_column_names_), - getPrewhereInfoFromQueryInfo(query_info_), + query_info_.prewhere_info, data_.getPartitionValueType(), virt_column_names_)}) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) @@ -266,7 +260,7 @@ ReadFromMergeTree::ReadFromMergeTree( , virt_column_names(std::move(virt_column_names_)) , data(data_) , query_info(query_info_) - , prewhere_info(getPrewhereInfoFromQueryInfo(query_info)) + , prewhere_info(query_info_.prewhere_info) , actions_settings(ExpressionActionsSettings::fromContext(context_)) , storage_snapshot(std::move(storage_snapshot_)) , metadata_for_reading(storage_snapshot->getMetadataForQuery()) @@ -321,7 +315,7 @@ ReadFromMergeTree::ReadFromMergeTree( *output_stream, storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), getSortDirection(), - query_info.getInputOrderInfo(), + query_info.input_order_info, prewhere_info); } @@ -1260,7 +1254,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( return merging_pipes.empty() ? Pipe::unitePipes(std::move(no_merging_pipes)) : Pipe::unitePipes(std::move(merging_pipes)); } -MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions) const { @@ -1269,7 +1263,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( std::move(alter_conversions), prewhere_info, filter_nodes, - storage_snapshot->metadata, metadata_for_reading, query_info, context, @@ -1460,12 +1453,11 @@ void ReadFromMergeTree::applyFilters() buildIndexes(indexes, filter_actions_dag, data, prepared_parts, context, query_info, metadata_for_reading); } -MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, const PrewhereInfoPtr & prewhere_info, const ActionDAGNodes & added_filter_nodes, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -1483,7 +1475,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return selectRangesToReadImpl( std::move(parts), std::move(alter_conversions), - metadata_snapshot_base, metadata_snapshot, updated_query_info_with_filter_dag, context, @@ -1496,10 +1487,9 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( indexes); } -MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -1533,27 +1523,26 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( buildIndexes(indexes, query_info.filter_actions_dag, data, parts, context, query_info, metadata_snapshot); if (indexes->part_values && indexes->part_values->empty()) - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); + return std::make_shared(std::move(result)); if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) { - return std::make_shared(MergeTreeDataSelectAnalysisResult{ - .result = std::make_exception_ptr(Exception( - ErrorCodes::INDEX_NOT_USED, - "Primary key ({}) is not used and setting 'force_primary_key' is set", - fmt::join(primary_key_column_names, ", ")))}); + throw Exception(ErrorCodes::INDEX_NOT_USED, + "Primary key ({}) is not used and setting 'force_primary_key' is set", + fmt::join(primary_key_column_names, ", ")); } + LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); if (indexes->part_offset_condition) LOG_DEBUG(log, "Part offset condition: {}", indexes->part_offset_condition->toString()); if (indexes->key_condition.alwaysFalse()) - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); + return std::make_shared(std::move(result)); size_t total_marks_pk = 0; size_t parts_before_pk = 0; - try + { MergeTreeDataSelectExecutor::filterPartsByPartition( indexes->partition_pruner, @@ -1561,7 +1550,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( parts, alter_conversions, indexes->part_values, - metadata_snapshot_base, + metadata_snapshot, data, context, max_block_numbers_to_read.get(), @@ -1580,14 +1569,13 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( log); if (result.sampling.read_nothing) - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); + return std::make_shared(std::move(result)); for (const auto & part : parts) total_marks_pk += part->index_granularity.getMarksCountWithoutFinal(); parts_before_pk = parts.size(); auto reader_settings = getMergeTreeReaderSettings(context, query_info); - result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( std::move(parts), std::move(alter_conversions), @@ -1602,10 +1590,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( result.index_stats, indexes->use_skip_indexes); } - catch (...) - { - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::current_exception()}); - } size_t sum_marks_pk = total_marks_pk; for (const auto & stat : result.index_stats) @@ -1632,12 +1616,12 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( result.total_marks_pk = total_marks_pk; result.selected_rows = sum_rows; - const auto & input_order_info = query_info.getInputOrderInfo(); - if (input_order_info) - result.read_type = (input_order_info->direction > 0) ? ReadType::InOrder - : ReadType::InReverseOrder; + if (query_info.input_order_info) + result.read_type = (query_info.input_order_info->direction > 0) + ? ReadType::InOrder + : ReadType::InReverseOrder; - return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); + return std::make_shared(std::move(result)); } bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit) @@ -1651,12 +1635,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, if (direction != 1 && query_info.isFinal()) return false; - auto order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); - if (query_info.projection) - query_info.projection->input_order_info = order_info; - else - query_info.input_order_info = order_info; - + query_info.input_order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); reader_settings.read_in_order = true; /// In case or read-in-order, don't create too many reading streams. @@ -1678,7 +1657,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, } if (!sort_description.empty()) { - const size_t used_prefix_of_sorting_key_size = order_info->used_prefix_of_sorting_key_size; + const size_t used_prefix_of_sorting_key_size = query_info.input_order_info->used_prefix_of_sorting_key_size; if (sort_description.size() > used_prefix_of_sorting_key_size) sort_description.resize(used_prefix_of_sorting_key_size); output_stream->sort_description = std::move(sort_description); @@ -1708,7 +1687,7 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info *output_stream, storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), getSortDirection(), - query_info.getInputOrderInfo(), + query_info.input_order_info, prewhere_info); } @@ -1775,11 +1754,11 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { - auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(prepared_parts, alter_conversions_for_parts); - if (std::holds_alternative(result_ptr->result)) - std::rethrow_exception(std::get(result_ptr->result)); + auto result_ptr = analyzed_result_ptr + ? analyzed_result_ptr + : selectRangesToRead(prepared_parts, alter_conversions_for_parts); - return std::get(result_ptr->result); + return *result_ptr; } bool ReadFromMergeTree::isQueryWithFinal() const @@ -1803,8 +1782,6 @@ Pipe ReadFromMergeTree::spreadMarkRanges( RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection) { const bool final = isQueryWithFinal(); - const auto & input_order_info = query_info.getInputOrderInfo(); - Names column_names_to_read = result.column_names_to_read; NameSet names(column_names_to_read.begin(), column_names_to_read.end()); @@ -1845,10 +1822,10 @@ Pipe ReadFromMergeTree::spreadMarkRanges( return spreadMarkRangesAmongStreamsFinal(std::move(parts_with_ranges), num_streams, result.column_names_to_read, column_names_to_read, result_projection); } - else if (input_order_info) + else if (query_info.input_order_info) { return spreadMarkRangesAmongStreamsWithOrder( - std::move(parts_with_ranges), num_streams, column_names_to_read, result_projection, input_order_info); + std::move(parts_with_ranges), num_streams, column_names_to_read, result_projection, query_info.input_order_info); } else { @@ -2248,33 +2225,5 @@ void ReadFromMergeTree::describeIndexes(JSONBuilder::JSONMap & map) const } } -bool MergeTreeDataSelectAnalysisResult::error() const -{ - return std::holds_alternative(result); -} - -size_t MergeTreeDataSelectAnalysisResult::marks() const -{ - if (std::holds_alternative(result)) - std::rethrow_exception(std::get(result)); - - return std::get(result).selected_marks; -} - -UInt64 MergeTreeDataSelectAnalysisResult::rows() const -{ - if (std::holds_alternative(result)) - std::rethrow_exception(std::get(result)); - - return std::get(result).selected_rows; -} - -const RangesInDataParts & MergeTreeDataSelectAnalysisResult::partsWithRanges() const -{ - if (std::holds_alternative(result)) - std::rethrow_exception(std::get(result)); - - return std::get(result).parts_with_ranges; -} } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index e2c38ebb251..58258f8db4d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -55,9 +55,6 @@ struct UsefulSkipIndexes std::vector merged_indices; }; -struct MergeTreeDataSelectAnalysisResult; -using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; - /// This step is created to read from MergeTree* table. /// For now, it takes a list of parts and creates source from it. class ReadFromMergeTree final : public SourceStepWithFilter @@ -108,6 +105,8 @@ public: void checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const; }; + using AnalysisResultPtr = std::shared_ptr; + ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, @@ -122,9 +121,8 @@ public: bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, - bool enable_parallel_reading - ); + AnalysisResultPtr analyzed_result_ptr_, + bool enable_parallel_reading); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } @@ -157,12 +155,11 @@ public: std::optional> part_values; }; - static MergeTreeDataSelectAnalysisResultPtr selectRangesToRead( + static AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, const PrewhereInfoPtr & prewhere_info, const ActionDAGNodes & added_filter_nodes, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -174,7 +171,7 @@ public: Poco::Logger * log, std::optional & indexes); - MergeTreeDataSelectAnalysisResultPtr selectRangesToRead( + AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions) const; @@ -196,7 +193,7 @@ public: bool willOutputEachPartitionThroughSeparatePort() const { return output_each_partition_through_separate_port; } bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; } - void setAnalyzedResult(MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } + void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } const std::vector & getAlterConvertionsForParts() const { return alter_conversions_for_parts; } @@ -209,10 +206,9 @@ public: void applyFilters() override; private: - static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl( + static AnalysisResultPtr selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -226,9 +222,8 @@ private: int getSortDirection() const { - const InputOrderInfoPtr & order_info = query_info.getInputOrderInfo(); - if (order_info) - return order_info->direction; + if (query_info.input_order_info) + return query_info.input_order_info->direction; return 1; } @@ -295,21 +290,11 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr; + AnalysisResultPtr analyzed_result_ptr; bool is_parallel_reading_from_replicas; std::optional all_ranges_callback; std::optional read_task_callback; }; -struct MergeTreeDataSelectAnalysisResult -{ - std::variant result; - - bool error() const; - size_t marks() const; - UInt64 rows() const; - const RangesInDataParts & partsWithRanges() const; -}; - } diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index e7b170f0f91..bf2e49727ed 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -6,30 +6,37 @@ namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_) : SourceStepWithFilter(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) - , context(std::move(context_)) - , qualified_projection_name(std::move(qualified_projection_name_)) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - if (context && context->hasQueryContext()) - context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); - for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); pipeline.init(std::move(pipe)); } +ReadFromStorageStep::ReadFromStorageStep( + Pipe pipe_, + String storage_name, + ContextPtr context_, + const SelectQueryInfo & query_info_) + : ReadFromPreparedSource(std::move(pipe_)) + , context(std::move(context_)) + , query_info(query_info_) +{ + setStepDescription(storage_name); + + for (const auto & processor : pipe.getProcessors()) + processor->setStorageLimits(query_info.storage_limits); +} + void ReadFromStorageStep::applyFilters() { - if (!context) - return; - for (const auto & processor : pipe.getProcessors()) if (auto * source = dynamic_cast(processor.get())) source->setKeyCondition(filter_nodes.nodes, context); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 16e790273ea..2eea48553b3 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -13,36 +13,25 @@ namespace DB class ReadFromPreparedSource : public SourceStepWithFilter { public: - explicit ReadFromPreparedSource( - Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {}); + explicit ReadFromPreparedSource(Pipe pipe_); String getName() const override { return "ReadFromPreparedSource"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; protected: Pipe pipe; - ContextPtr context; - Context::QualifiedProjectionName qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource { public: - ReadFromStorageStep(Pipe pipe_, String storage_name, const SelectQueryInfo & query_info_, ContextPtr context_) - : ReadFromPreparedSource(std::move(pipe_), std::move(context_)), query_info(query_info_) - { - setStepDescription(storage_name); - - for (const auto & processor : pipe.getProcessors()) - processor->setStorageLimits(query_info.storage_limits); - } + ReadFromStorageStep(Pipe pipe_, String storage_name, ContextPtr context_, const SelectQueryInfo & query_info_); String getName() const override { return "ReadFromStorage"; } - void applyFilters() override; private: + ContextPtr context; SelectQueryInfo query_info; }; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 0d1fae0d239..56371a5de56 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -231,8 +232,6 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact add_extremes = context->getSettingsRef().extremes; } - String query_string = formattedAST(shard.query); - scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared(), "_shard_num"}}; @@ -254,29 +253,81 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact context->setSetting("cluster_for_parallel_replicas", cluster_name); } - auto remote_query_executor = std::make_shared( - shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); - remote_query_executor->setLogger(log); - - if (context->canUseTaskBasedParallelReplicas()) + /// parallel replicas custom key case + if (shard.shard_filter_generator) { - // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: - // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. - // The coordinator will return query result from the shard. - // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. - // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) - // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators - remote_query_executor->setPoolMode(PoolMode::GET_ONE); + for (size_t i = 0; i < shard.shard_info.per_replica_pools.size(); ++i) + { + auto query = shard.query->clone(); + auto & select_query = query->as(); + auto shard_filter = shard.shard_filter_generator(i + 1); + if (shard_filter) + { + auto where_expression = select_query.where(); + if (where_expression) + shard_filter = makeASTFunction("and", where_expression, shard_filter); + + select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter)); + } + + const String query_string = formattedAST(query); + + if (!priority_func_factory.has_value()) + priority_func_factory = GetPriorityForLoadBalancing(LoadBalancing::ROUND_ROBIN, randomSeed()); + + GetPriorityForLoadBalancing::Func priority_func + = priority_func_factory->getPriorityFunc(LoadBalancing::ROUND_ROBIN, 0, shard.shard_info.pool->getPoolSize()); + + auto remote_query_executor = std::make_shared( + shard.shard_info.pool, + query_string, + output_stream->header, + context, + throttler, + scalars, + external_tables, + stage, + std::nullopt, + priority_func); + remote_query_executor->setLogger(log); + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + + if (!table_func_ptr) + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + + pipes.emplace_back( + createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addConvertingActions(pipes.back(), output_stream->header); + } } else - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + { + const String query_string = formattedAST(shard.query); - if (!table_func_ptr) - remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + auto remote_query_executor = std::make_shared( + shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); + remote_query_executor->setLogger(log); - pipes.emplace_back( - createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), output_stream->header); + if (context->canUseTaskBasedParallelReplicas()) + { + // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: + // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. + // The coordinator will return query result from the shard. + // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. + // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) + // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + } + else + remote_query_executor->setPoolMode(PoolMode::GET_MANY); + + if (!table_func_ptr) + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + + pipes.emplace_back( + createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addConvertingActions(pipes.back(), output_stream->header); + } } void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 35ae336a72c..f6fd7b3a630 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -60,6 +60,7 @@ private: Poco::Logger * log; UInt32 shard_count; const String cluster_name; + std::optional priority_func_factory; void addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); void addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index aec959233ea..5173b18c6bf 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -332,7 +332,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( , storage{std::move(storage_)} , storage_snapshot{storage_snapshot_} , context{std::move(context_)} - , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->getMetadataForQuery()->columns, context).expression} + , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context).expression} , max_block_size{max_block_size_} , num_streams{num_streams_} , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 4877660dc3f..81225d1cdf2 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,42 @@ namespace assert_cast(column).insertData(value.data(), value.size()); read_bytes_size += column.sizeOfValueIfFixed(); break; + case ValueType::vtPoint: + { + /// The value is 25 bytes: + /// 4 bytes for integer SRID (0) + /// 1 byte for integer byte order (1 = little-endian) + /// 4 bytes for integer type information (1 = Point) + /// 8 bytes for double-precision X coordinate + /// 8 bytes for double-precision Y coordinate + ReadBufferFromMemory payload(value.data(), value.size()); + String val; + payload.ignore(4); + + UInt8 endian; + readBinary(endian, payload); + + Int32 point_type; + readBinary(point_type, payload); + if (point_type != 1) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only Point data type is supported"); + + Float64 x, y; + if (endian == 1) + { + readBinaryLittleEndian(x, payload); + readBinaryLittleEndian(y, payload); + } + else + { + readBinaryBigEndian(x, payload); + readBinaryBigEndian(y, payload); + } + + assert_cast(column).insert(Tuple({Field(x), Field(y)})); + read_bytes_size += value.size(); + break; + } default: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value type"); } diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 47d2c2c5cc6..d8888773054 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -732,14 +732,14 @@ void AggregatingTransform::initGenerate() { if (!skip_merging) { - auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants); + auto prepared_data = params->aggregator.prepareVariantsToMerge(std::move(many_data->variants)); auto prepared_data_ptr = std::make_shared(std::move(prepared_data)); processors.emplace_back( std::make_shared(params, std::move(prepared_data_ptr), max_threads)); } else { - auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants); + auto prepared_data = params->aggregator.prepareVariantsToMerge(std::move(many_data->variants)); Pipes pipes; for (auto & variant : prepared_data) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ab9b3a80f12..71d652e74d0 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -420,7 +420,11 @@ Chain buildPushingToViewsChain( if (!no_destination && context->hasQueryContext()) { context->getQueryContext()->addQueryAccessInfo( - backQuoteIfNeed(view_id.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", view_id.getFullTableName()); + backQuoteIfNeed(view_id.getDatabaseName()), + views_data->views.back().runtime_stats->target_name, + /*column_names=*/ {}); + + context->getQueryContext()->addViewAccessInfo(view_id.getFullTableName()); } } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 46c6a77f60f..67a8fe5dcab 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -38,7 +38,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int BAD_ARGUMENTS; } void QueryPipelineBuilder::checkInitialized() @@ -358,7 +357,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->pipe.dropExtremes(); right->pipe.dropExtremes(); if ((left->getNumStreams() != 1 || right->getNumStreams() != 1) && join->getTableJoin().kind() == JoinKind::Paste) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Paste JOIN requires sorted tables only"); + { + left->pipe.resize(1, true); + right->pipe.resize(1, true); + } else if (left->getNumStreams() != 1 || right->getNumStreams() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join is supported only for pipelines with one output port"); diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 0a8df5d1d34..a43571c8114 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -43,13 +43,24 @@ namespace ErrorCodes } RemoteQueryExecutor::RemoteQueryExecutor( - const String & query_, const Block & header_, ContextPtr context_, - const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_) - : header(header_), query(query_), context(context_), scalars(scalars_) - , external_tables(external_tables_), stage(stage_) + const String & query_, + const Block & header_, + ContextPtr context_, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func_) + : header(header_) + , query(query_) + , context(context_) + , scalars(scalars_) + , external_tables(external_tables_) + , stage(stage_) , extension(extension_) -{} + , priority_func(priority_func_) +{ +} RemoteQueryExecutor::RemoteQueryExecutor( Connection & connection, @@ -100,10 +111,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, - const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) + const String & query_, + const Block & header_, + ContextPtr context_, + const ThrottlerPtr & throttler, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_, priority_func_) { create_connections = [this, pool, throttler](AsyncCallback async_callback)->std::unique_ptr { @@ -117,7 +134,8 @@ RemoteQueryExecutor::RemoteQueryExecutor( if (main_table) table_to_check = std::make_shared(main_table.getQualifiedName()); - auto res = std::make_unique(pool, context, timeouts, throttler, pool_mode, table_to_check, std::move(async_callback)); + auto res = std::make_unique( + pool, context, timeouts, throttler, pool_mode, table_to_check, std::move(async_callback), priority_func); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; @@ -137,14 +155,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( pool_mode, main_table.getQualifiedName(), std::move(async_callback), - skip_unavailable_endpoints); + skip_unavailable_endpoints, + priority_func); connection_entries.reserve(try_results.size()); for (auto & try_result : try_results) connection_entries.emplace_back(std::move(try_result.entry)); } else { - connection_entries = pool->getMany(timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints); + connection_entries = pool->getMany( + timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 49ea40bf4b6..5a8ccc2592b 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -50,6 +50,7 @@ public: std::shared_ptr task_iterator = nullptr; std::shared_ptr parallel_reading_coordinator = nullptr; std::optional replica_info = {}; + GetPriorityForLoadBalancing::Func priority_func; }; /// Takes already set connection. @@ -76,9 +77,15 @@ public: /// Takes a pool and gets one or several connections from it. RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, - const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); + const String & query_, + const Block & header_, + ContextPtr context_, + const ThrottlerPtr & throttler = nullptr, + const Scalars & scalars_ = Scalars(), + const Tables & external_tables_ = Tables(), + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, + std::optional extension_ = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); ~RemoteQueryExecutor(); @@ -191,9 +198,14 @@ public: private: RemoteQueryExecutor( - const String & query_, const Block & header_, ContextPtr context_, - const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_); + const String & query_, + const Block & header_, + ContextPtr context_, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func = {}); Block header; Block totals; @@ -273,6 +285,8 @@ private: Poco::Logger * log = nullptr; + GetPriorityForLoadBalancing::Func priority_func; + /// Send all scalars to remote servers void sendScalars(); diff --git a/src/Server/HTTP/HTTPRequestHandler.h b/src/Server/HTTP/HTTPRequestHandler.h index 19340866bb7..7902e86e3ed 100644 --- a/src/Server/HTTP/HTTPRequestHandler.h +++ b/src/Server/HTTP/HTTPRequestHandler.h @@ -13,7 +13,8 @@ class HTTPRequestHandler : private boost::noncopyable public: virtual ~HTTPRequestHandler() = default; - virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) = 0; + virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) = 0; + virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { handleRequest(request, response, ProfileEvents::end()); } }; } diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp index 46734933263..90bdebf6451 100644 --- a/src/Server/HTTP/HTTPServer.cpp +++ b/src/Server/HTTP/HTTPServer.cpp @@ -10,8 +10,10 @@ HTTPServer::HTTPServer( HTTPRequestHandlerFactoryPtr factory_, Poco::ThreadPool & thread_pool, Poco::Net::ServerSocket & socket_, - Poco::Net::HTTPServerParams::Ptr params) - : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), thread_pool, socket_, params), factory(factory_) + Poco::Net::HTTPServerParams::Ptr params, + const ProfileEvents::Event & read_event, + const ProfileEvents::Event & write_event) + : TCPServer(new HTTPServerConnectionFactory(context, params, factory_, read_event, write_event), thread_pool, socket_, params), factory(factory_) { } diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h index adfb21e7c62..9911cde1b93 100644 --- a/src/Server/HTTP/HTTPServer.h +++ b/src/Server/HTTP/HTTPServer.h @@ -20,7 +20,9 @@ public: HTTPRequestHandlerFactoryPtr factory, Poco::ThreadPool & thread_pool, Poco::Net::ServerSocket & socket, - Poco::Net::HTTPServerParams::Ptr params); + Poco::Net::HTTPServerParams::Ptr params, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); ~HTTPServer() override; diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index 042f5e2e5df..047db014560 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -11,8 +11,10 @@ HTTPServerConnection::HTTPServerConnection( TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params_, - HTTPRequestHandlerFactoryPtr factory_) - : TCPServerConnection(socket), context(std::move(context_)), tcp_server(tcp_server_), params(params_), factory(factory_), stopped(false) + HTTPRequestHandlerFactoryPtr factory_, + const ProfileEvents::Event & read_event_, + const ProfileEvents::Event & write_event_) + : TCPServerConnection(socket), context(std::move(context_)), tcp_server(tcp_server_), params(params_), factory(factory_), read_event(read_event_), write_event(write_event_), stopped(false) { poco_check_ptr(factory); } @@ -30,7 +32,7 @@ void HTTPServerConnection::run() if (!stopped && tcp_server.isOpen() && session.connected()) { HTTPServerResponse response(session); - HTTPServerRequest request(context, response, session); + HTTPServerRequest request(context, response, session, read_event); Poco::Timestamp now; @@ -65,7 +67,7 @@ void HTTPServerConnection::run() if (request.getExpectContinue() && response.getStatus() == Poco::Net::HTTPResponse::HTTP_OK) response.sendContinue(); - handler->handleRequest(request, response); + handler->handleRequest(request, response, write_event); session.setKeepAlive(params->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()); } else diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h index 7087f8d5a21..c6b1dc1ba25 100644 --- a/src/Server/HTTP/HTTPServerConnection.h +++ b/src/Server/HTTP/HTTPServerConnection.h @@ -19,7 +19,9 @@ public: TCPServer & tcp_server, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params, - HTTPRequestHandlerFactoryPtr factory); + HTTPRequestHandlerFactoryPtr factory, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); HTTPServerConnection( HTTPContextPtr context_, @@ -27,8 +29,10 @@ public: const Poco::Net::StreamSocket & socket_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_, - const String & forwarded_for_) - : HTTPServerConnection(context_, tcp_server_, socket_, params_, factory_) + const String & forwarded_for_, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()) + : HTTPServerConnection(context_, tcp_server_, socket_, params_, factory_, read_event_, write_event_) { forwarded_for = forwarded_for_; } @@ -44,6 +48,8 @@ private: Poco::Net::HTTPServerParams::Ptr params; HTTPRequestHandlerFactoryPtr factory; String forwarded_for; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; bool stopped; std::mutex mutex; // guards the |factory| with assumption that creating handlers is not thread-safe. }; diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp index 2c9ac0cda2a..16e5160fe3f 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.cpp +++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp @@ -5,20 +5,20 @@ namespace DB { HTTPServerConnectionFactory::HTTPServerConnectionFactory( - HTTPContextPtr context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) - : context(std::move(context_)), params(params_), factory(factory_) + HTTPContextPtr context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) + : context(std::move(context_)), params(params_), factory(factory_), read_event(read_event_), write_event(write_event_) { poco_check_ptr(factory); } Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { - return new HTTPServerConnection(context, tcp_server, socket, params, factory); + return new HTTPServerConnection(context, tcp_server, socket, params, factory, read_event, write_event); } Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData & stack_data) { - return new HTTPServerConnection(context, tcp_server, socket, params, factory, stack_data.forwarded_for); + return new HTTPServerConnection(context, tcp_server, socket, params, factory, stack_data.forwarded_for, read_event, write_event); } } diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h index e18249da4de..4b785e31744 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.h +++ b/src/Server/HTTP/HTTPServerConnectionFactory.h @@ -12,7 +12,7 @@ namespace DB class HTTPServerConnectionFactory : public TCPServerConnectionFactory { public: - HTTPServerConnectionFactory(HTTPContextPtr context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); + HTTPServerConnectionFactory(HTTPContextPtr context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()); Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override; Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData & stack_data) override; @@ -21,6 +21,8 @@ private: HTTPContextPtr context; Poco::Net::HTTPServerParams::Ptr params; HTTPRequestHandlerFactoryPtr factory; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; }; } diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index de5dde3c4aa..4a6e85ba0fb 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -22,7 +22,7 @@ namespace DB { -HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session) +HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session, const ProfileEvents::Event & read_event) : max_uri_size(context->getMaxUriSize()) , max_fields_number(context->getMaxFields()) , max_field_name_size(context->getMaxFieldNameSize()) @@ -41,7 +41,7 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse session.socket().setReceiveTimeout(receive_timeout); session.socket().setSendTimeout(send_timeout); - auto in = std::make_unique(session.socket()); + auto in = std::make_unique(session.socket(), read_event); socket = session.socket().impl(); readRequest(*in); /// Try parse according to RFC7230 diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h index 1f38334c745..aaec89ab757 100644 --- a/src/Server/HTTP/HTTPServerRequest.h +++ b/src/Server/HTTP/HTTPServerRequest.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "config.h" #include @@ -19,7 +20,7 @@ class ReadBufferFromPocoSocket; class HTTPServerRequest : public HTTPRequest { public: - HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session); + HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session, const ProfileEvents::Event & read_event = ProfileEvents::end()); /// FIXME: it's a little bit inconvenient interface. The rationale is that all other ReadBuffer's wrap each other /// via unique_ptr - but we can't inherit HTTPServerRequest from ReadBuffer and pass it around, diff --git a/src/Server/HTTP/HTTPServerResponse.cpp b/src/Server/HTTP/HTTPServerResponse.cpp index 25e7604a515..3c2d54a67df 100644 --- a/src/Server/HTTP/HTTPServerResponse.cpp +++ b/src/Server/HTTP/HTTPServerResponse.cpp @@ -9,12 +9,15 @@ #include #include #include +#include namespace DB { -HTTPServerResponse::HTTPServerResponse(Poco::Net::HTTPServerSession & session_) : session(session_) +HTTPServerResponse::HTTPServerResponse(Poco::Net::HTTPServerSession & session_, const ProfileEvents::Event & write_event_) + : session(session_) + , write_event(write_event_) { } @@ -24,42 +27,45 @@ void HTTPServerResponse::sendContinue() hs << getVersion() << " 100 Continue\r\n\r\n"; } -std::shared_ptr HTTPServerResponse::send() +std::shared_ptr HTTPServerResponse::send() { poco_assert(!stream); if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) { - Poco::CountingOutputStream cs; - write(cs); - stream = std::make_shared(session, cs.chars()); - write(*stream); + // Send header + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + stream = std::make_shared(session.socket(), write_event); } else if (getChunkedTransferEncoding()) { + // Send header Poco::Net::HTTPHeaderOutputStream hs(session); write(hs); - stream = std::make_shared(session); + stream = std::make_shared(session.socket(), write_event); } else if (hasContentLength()) { - Poco::CountingOutputStream cs; - write(cs); - stream = std::make_shared(session, getContentLength64() + cs.chars()); - write(*stream); + // Send header + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + stream = std::make_shared(session.socket(), getContentLength(), write_event); } else { - stream = std::make_shared(session); setKeepAlive(false); - write(*stream); + // Send header + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + stream = std::make_shared(session.socket(), write_event); } return stream; } -std::pair, std::shared_ptr> HTTPServerResponse::beginSend() +std::pair, std::shared_ptr> HTTPServerResponse::beginSend() { poco_assert(!stream); poco_assert(!header_stream); @@ -71,40 +77,39 @@ std::pair, std::shared_ptr> HTTPServ { throw Poco::Exception("HTTPServerResponse::beginSend is invalid for HEAD request"); } - else if (getChunkedTransferEncoding()) - { - header_stream = std::make_shared(session); - beginWrite(*header_stream); - stream = std::make_shared(session); - } - else if (hasContentLength()) + + if (hasContentLength()) { throw Poco::Exception("HTTPServerResponse::beginSend is invalid for response with Content-Length header"); } + + // Write header to buffer + std::stringstream header; //STYLE_CHECK_ALLOW_STD_STRING_STREAM + beginWrite(header); + // Send header + auto str = header.str(); + header_stream = std::make_shared(session.socket(), write_event, str.size()); + header_stream->write(str); + + if (getChunkedTransferEncoding()) + stream = std::make_shared(session.socket(), write_event); else - { - stream = std::make_shared(session); - header_stream = stream; - setKeepAlive(false); - beginWrite(*stream); - } + stream = std::make_shared(session.socket(), write_event); return std::make_pair(header_stream, stream); } void HTTPServerResponse::sendBuffer(const void * buffer, std::size_t length) { - poco_assert(!stream); - setContentLength(static_cast(length)); setChunkedTransferEncoding(false); + // Send header + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + hs.flush(); - stream = std::make_shared(session); - write(*stream); if (request && request->getMethod() != HTTPRequest::HTTP_HEAD) - { - stream->write(static_cast(buffer), static_cast(length)); - } + WriteBufferFromPocoSocket(session.socket(), write_event).write(static_cast(buffer), length); } void HTTPServerResponse::requireAuthentication(const std::string & realm) diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index 236a56e2323..6efe48667eb 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -1,9 +1,12 @@ #pragma once +#include #include #include #include +#include +#include #include @@ -11,12 +14,182 @@ namespace DB { + +class HTTPWriteBufferChunked : public WriteBufferFromPocoSocket +{ + using WriteBufferFromPocoSocket::WriteBufferFromPocoSocket; +protected: + void nextImpl() override + { + if (offset() == 0) + return; + + std::string chunk_header; + Poco::NumberFormatter::appendHex(chunk_header, offset()); + chunk_header.append("\r\n", 2); + socketSendBytes(chunk_header.data(), static_cast(chunk_header.size())); + WriteBufferFromPocoSocket::nextImpl(); + socketSendBytes("\r\n", 2); + } + + void finalizeImpl() override + { + WriteBufferFromPocoSocket::finalizeImpl(); + socketSendBytes("0\r\n\r\n", 5); + } +}; + +class HTTPWriteBufferFixedLength : public WriteBufferFromPocoSocket +{ +public: + explicit HTTPWriteBufferFixedLength(Poco::Net::Socket & socket_, size_t fixed_length_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : WriteBufferFromPocoSocket(socket_, buf_size) + { + fixed_length = fixed_length_; + } + explicit HTTPWriteBufferFixedLength(Poco::Net::Socket & socket_, size_t fixed_length_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : WriteBufferFromPocoSocket(socket_, write_event_, buf_size) + { + fixed_length = fixed_length_; + } +protected: + void nextImpl() override + { + if (count_length >= fixed_length || offset() == 0) + return; + + if (count_length + offset() > fixed_length) + pos -= offset() - (fixed_length - count_length); + + count_length += offset(); + + WriteBufferFromPocoSocket::nextImpl(); + } +private: + size_t fixed_length; + size_t count_length = 0; +}; + +/// Universal HTTP buffer, can be switched for different Transfer-Encoding/Content-Length on the fly +/// so it can be used to output HTTP header and then switched to appropriate mode for body +class HTTPWriteBuffer : public WriteBufferFromPocoSocket +{ +public: + explicit HTTPWriteBuffer(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : WriteBufferFromPocoSocket(socket_, buf_size) + { + } + explicit HTTPWriteBuffer(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : WriteBufferFromPocoSocket(socket_, write_event_, buf_size) + { + } + + void setChunked(size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + { + chunked = true; + resizeIfNeeded(buf_size); + } + + bool isChunked() + { + return chunked; + } + + void setFixedLength(size_t length) + { + chunked = false; + fixed_length = length; + count_length = 0; + resizeIfNeeded(length); + } + + size_t isFixedLength() + { + return chunked ? 0 : fixed_length; + } + + void setPlain(size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + { + chunked = false; + fixed_length = 0; + count_length = 0; + resizeIfNeeded(buf_size); + } + + bool isPlain() + { + return !(isChunked() || isFixedLength()); + } + +protected: + void finalizeImpl() override + { + WriteBufferFromPocoSocket::finalizeImpl(); + if (chunked) + socketSendBytes("0\r\n\r\n", 5); + } + + void nextImpl() override + { + if (chunked) + return nextImplChunked(); + + if (fixed_length) + return nextImplFixedLength(); + + WriteBufferFromPocoSocket::nextImpl(); + } + + void nextImplFixedLength() + { + if (count_length >= fixed_length || offset() == 0) + return; + + if (count_length + offset() > fixed_length) + pos -= offset() - (fixed_length - count_length); + + count_length += offset(); + + WriteBufferFromPocoSocket::nextImpl(); + } + + void nextImplChunked() + { + if (offset() == 0) + return; + + std::string chunk_header; + Poco::NumberFormatter::appendHex(chunk_header, offset()); + chunk_header.append("\r\n", 2); + socketSendBytes(chunk_header.data(), static_cast(chunk_header.size())); + WriteBufferFromPocoSocket::nextImpl(); + socketSendBytes("\r\n", 2); + } + + void resizeIfNeeded(size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + { + if (!buf_size) + return; + + auto data_size = offset(); + assert(data_size <= buf_size); + + memory.resize(buf_size); + set(memory.data(), memory.size(), data_size); + } +private: + bool chunked = false; + size_t fixed_length = 0; + size_t count_length = 0; +}; + + class HTTPServerRequest; class HTTPServerResponse : public HTTPResponse { public: - explicit HTTPServerResponse(Poco::Net::HTTPServerSession & session); + explicit HTTPServerResponse(Poco::Net::HTTPServerSession & session, const ProfileEvents::Event & write_event_ = ProfileEvents::end()); void sendContinue(); /// Sends a 100 Continue response to the client. @@ -26,7 +199,7 @@ public: /// /// Must not be called after beginSend(), sendFile(), sendBuffer() /// or redirect() has been called. - std::shared_ptr send(); /// TODO: use some WriteBuffer implementation here. + std::shared_ptr send(); /// Sends the response headers to the client /// but do not finish headers with \r\n, @@ -34,7 +207,7 @@ public: /// /// Must not be called after send(), sendFile(), sendBuffer() /// or redirect() has been called. - std::pair, std::shared_ptr> beginSend(); /// TODO: use some WriteBuffer implementation here. + std::pair, std::shared_ptr> beginSend(); /// Sends the response header to the client, followed /// by the contents of the given buffer. @@ -58,13 +231,16 @@ public: /// Returns true if the response (header) has been sent. bool sent() const { return !!stream; } + Poco::Net::StreamSocket & getSocket() { return session.socket(); } + void attachRequest(HTTPServerRequest * request_) { request = request_; } private: Poco::Net::HTTPServerSession & session; HTTPServerRequest * request = nullptr; - std::shared_ptr stream; - std::shared_ptr header_stream; + ProfileEvents::Event write_event; + std::shared_ptr stream; + std::shared_ptr header_stream; }; } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 1a12c09a8c7..a3e7c28c8dc 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -1,17 +1,16 @@ +#include "Common/StackTrace.h" #include - #include #include #include #include +#include +#include +#include namespace DB { -namespace ErrorCodes -{ -} - void WriteBufferFromHTTPServerResponse::startSendHeaders() { @@ -19,27 +18,33 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() { headers_started_sending = true; + if (response.getChunkedTransferEncoding()) + setChunked(); + if (add_cors_header) response.set("Access-Control-Allow-Origin", "*"); setResponseDefaultHeaders(response, keep_alive_timeout); - if (!is_http_method_head) - std::tie(response_header_ostr, response_body_ostr) = response.beginSend(); + std::stringstream header; //STYLE_CHECK_ALLOW_STD_STRING_STREAM + response.beginWrite(header); + auto header_str = header.str(); + socketSendBytes(header_str.data(), header_str.size()); } } void WriteBufferFromHTTPServerResponse::writeHeaderProgressImpl(const char * header_name) { - if (headers_finished_sending) + if (is_http_method_head || headers_finished_sending || !headers_started_sending) return; WriteBufferFromOwnString progress_string_writer; accumulated_progress.writeJSON(progress_string_writer); - if (response_header_ostr) - *response_header_ostr << header_name << progress_string_writer.str() << "\r\n" << std::flush; + socketSendBytes(header_name, strlen(header_name)); + socketSendBytes(progress_string_writer.str().data(), progress_string_writer.str().size()); + socketSendBytes("\r\n", 2); } void WriteBufferFromHTTPServerResponse::writeHeaderSummary() @@ -57,30 +62,30 @@ void WriteBufferFromHTTPServerResponse::writeExceptionCode() { if (headers_finished_sending || !exception_code) return; - if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Exception-Code: " << exception_code << "\r\n" << std::flush; + if (headers_started_sending) + { + socketSendBytes("X-ClickHouse-Exception-Code: ", sizeof("X-ClickHouse-Exception-Code: ") - 1); + auto str_code = std::to_string(exception_code); + socketSendBytes(str_code.data(), str_code.size()); + socketSendBytes("\r\n", 2); + } } void WriteBufferFromHTTPServerResponse::finishSendHeaders() { - if (!headers_finished_sending) - { - writeHeaderSummary(); - writeExceptionCode(); - headers_finished_sending = true; + if (headers_finished_sending) + return; - if (!is_http_method_head) - { - /// Send end of headers delimiter. - if (response_header_ostr) - *response_header_ostr << "\r\n" << std::flush; - } - else - { - if (!response_body_ostr) - response_body_ostr = response.send(); - } - } + if (!headers_started_sending) + startSendHeaders(); + + writeHeaderSummary(); + writeExceptionCode(); + + headers_finished_sending = true; + + /// Send end of headers delimiter. + socketSendBytes("\r\n", 2); } @@ -89,47 +94,19 @@ void WriteBufferFromHTTPServerResponse::nextImpl() if (!initialized) { std::lock_guard lock(mutex); - /// Initialize as early as possible since if the code throws, /// next() should not be called anymore. initialized = true; + if (compression_method != CompressionMethod::None) + response.set("Content-Encoding", toContentEncodingName(compression_method)); + startSendHeaders(); - - if (!out && !is_http_method_head) - { - if (compress) - { - auto content_encoding_name = toContentEncodingName(compression_method); - - *response_header_ostr << "Content-Encoding: " << content_encoding_name << "\r\n"; - } - - /// We reuse our buffer in "out" to avoid extra allocations and copies. - - if (compress) - out = wrapWriteBufferWithCompressionMethod( - std::make_unique(*response_body_ostr), - compress ? compression_method : CompressionMethod::None, - compression_level, - working_buffer.size(), - working_buffer.begin()); - else - out = std::make_unique( - *response_body_ostr, - working_buffer.size(), - working_buffer.begin()); - } - finishSendHeaders(); } - if (out) - { - out->buffer() = buffer(); - out->position() = position(); - out->next(); - } + if (!is_http_method_head) + HTTPWriteBuffer::nextImpl(); } @@ -137,14 +114,11 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, UInt64 keep_alive_timeout_, - bool compress_, - CompressionMethod compression_method_) - : BufferWithOwnMemory(DBMS_DEFAULT_BUFFER_SIZE) + const ProfileEvents::Event & write_event_) + : HTTPWriteBuffer(response_.getSocket(), write_event_) , response(response_) , is_http_method_head(is_http_method_head_) , keep_alive_timeout(keep_alive_timeout_) - , compress(compress_) - , compression_method(compression_method_) { } @@ -169,37 +143,43 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) } } +void WriteBufferFromHTTPServerResponse::setExceptionCode(int exception_code_) +{ + std::lock_guard lock(mutex); + if (headers_started_sending) + exception_code = exception_code_; + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code_)); +} + WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() { - finalize(); + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } void WriteBufferFromHTTPServerResponse::finalizeImpl() { - try + if (!headers_finished_sending) { - next(); - if (out) - out->finalize(); - out.reset(); - /// Catch write-after-finalize bugs. - set(nullptr, 0); - } - catch (...) - { - /// Avoid calling WriteBufferFromOStream::next() from dtor - /// (via WriteBufferFromHTTPServerResponse::next()) - out.reset(); - throw; - } - - if (!offset()) - { - /// If no remaining data, just send headers. std::lock_guard lock(mutex); + /// If no body data just send header startSendHeaders(); + + if (!initialized && offset() && compression_method != CompressionMethod::None) + socketSendStr("Content-Encoding: " + toContentEncodingName(compression_method) + "\r\n"); + finishSendHeaders(); } + + if (!is_http_method_head) + HTTPWriteBuffer::finalizeImpl(); } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 38345f27952..a3952b7c553 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -5,8 +5,8 @@ #include #include #include -#include #include +#include #include #include @@ -17,48 +17,26 @@ namespace DB { -/// The difference from WriteBufferFromOStream is that this buffer gets the underlying std::ostream -/// (using response.send()) only after data is flushed for the first time. This is needed in HTTP -/// servers to change some HTTP headers (e.g. response code) before any data is sent to the client -/// (headers can't be changed after response.send() is called). -/// -/// In short, it allows delaying the call to response.send(). -/// -/// Additionally, supports HTTP response compression (in this case corresponding Content-Encoding -/// header will be set). +/// Postpone sending HTTP header until first data is flushed. This is needed in HTTP servers +/// to change some HTTP headers (e.g. response code) before any data is sent to the client. /// /// Also this class write and flush special X-ClickHouse-Progress HTTP headers /// if no data was sent at the time of progress notification. /// This allows to implement progress bar in HTTP clients. -class WriteBufferFromHTTPServerResponse final : public BufferWithOwnMemory +class WriteBufferFromHTTPServerResponse final : public HTTPWriteBuffer { public: WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, UInt64 keep_alive_timeout_, - bool compress_ = false, /// If true - set Content-Encoding header and compress the result. - CompressionMethod compression_method_ = CompressionMethod::None); + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); ~WriteBufferFromHTTPServerResponse() override; /// Writes progress in repeating HTTP headers. void onProgress(const Progress & progress); - /// Turn compression on or off. - /// The setting has any effect only if HTTP headers haven't been sent yet. - void setCompression(bool enable_compression) - { - compress = enable_compression; - } - - /// Set compression level if the compression is turned on. - /// The setting has any effect only if HTTP headers haven't been sent yet. - void setCompressionLevel(int level) - { - compression_level = level; - } - /// Turn CORS on or off. /// The setting has any effect only if HTTP headers haven't been sent yet. void addHeaderCORS(bool enable_cors) @@ -75,7 +53,13 @@ public: send_progress_interval_ms = send_progress_interval_ms_; } - void setExceptionCode(int exception_code_) { exception_code = exception_code_; } + /// Content-Encoding header will be set on first data package + void setCompressionMethodHeader(const CompressionMethod & compression_method_) + { + compression_method = compression_method_; + } + + void setExceptionCode(int exception_code_); private: /// Send at least HTTP headers if no data has been sent yet. @@ -108,14 +92,7 @@ private: bool is_http_method_head; bool add_cors_header = false; size_t keep_alive_timeout = 0; - bool compress = false; - CompressionMethod compression_method; - int compression_level = 1; - std::shared_ptr response_body_ostr; - std::shared_ptr response_header_ostr; - - std::unique_ptr out; bool initialized = false; bool headers_started_sending = false; @@ -126,6 +103,8 @@ private: size_t send_progress_interval_ms = 100; Stopwatch progress_watch; + CompressionMethod compression_method = CompressionMethod::None; + int exception_code = 0; std::mutex mutex; /// progress callback could be called from different threads. diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index f53c0094ef7..bdc8e7d59c9 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -46,18 +47,11 @@ #include #include +#include #include +#include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - #if USE_SSL #include #endif @@ -301,7 +295,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output) std::vector write_buffers; ConcatReadBuffer::Buffers read_buffers; - auto * cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed.get()); + auto * cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed); if (!cascade_buffer) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected CascadeWriteBuffer"); @@ -553,7 +547,8 @@ void HTTPHandler::processQuery( HTMLForm & params, HTTPServerResponse & response, Output & used_output, - std::optional & query_scope) + std::optional & query_scope, + const ProfileEvents::Event & write_event) { using namespace Poco::Net; @@ -564,6 +559,9 @@ void HTTPHandler::processQuery( /// The user could specify session identifier and session timeout. /// It allows to modify settings, create temporary tables and reuse them in subsequent requests. + + SCOPE_EXIT({ session->releaseSessionID(); }); + String session_id; std::chrono::steady_clock::duration session_timeout; bool session_is_set = params.has("session_id"); @@ -616,15 +614,35 @@ void HTTPHandler::processQuery( size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE; size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0; - used_output.out = std::make_shared( - response, - request.getMethod() == HTTPRequest::HTTP_HEAD, - context->getServerSettings().keep_alive_timeout.totalSeconds(), - client_supports_http_compression, - http_response_compression_method); + bool enable_http_compression = params.getParsed("enable_http_compression", context->getSettingsRef().enable_http_compression); + Int64 http_zlib_compression_level = params.getParsed("http_zlib_compression_level", context->getSettingsRef().http_zlib_compression_level); + + used_output.out_holder = + std::make_shared( + response, + request.getMethod() == HTTPRequest::HTTP_HEAD, + context->getServerSettings().keep_alive_timeout.totalSeconds(), + write_event); + used_output.out = used_output.out_holder; + used_output.out_maybe_compressed = used_output.out_holder; + + if (client_supports_http_compression && enable_http_compression) + { + used_output.out_holder->setCompressionMethodHeader(http_response_compression_method); + used_output.wrap_compressed_holder = + wrapWriteBufferWithCompressionMethod( + used_output.out_holder.get(), + http_response_compression_method, + static_cast(http_zlib_compression_level), + 0, DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0, false); + used_output.out = used_output.wrap_compressed_holder; + } if (internal_compression) - used_output.out_maybe_compressed = std::make_shared(*used_output.out); + { + used_output.out_compressed_holder = std::make_shared(*used_output.out); + used_output.out_maybe_compressed = used_output.out_compressed_holder; + } else used_output.out_maybe_compressed = used_output.out; @@ -664,12 +682,12 @@ void HTTPHandler::processQuery( cascade_buffer2.emplace_back(push_memory_buffer_and_continue); } - used_output.out_maybe_delayed_and_compressed = std::make_shared( - std::move(cascade_buffer1), std::move(cascade_buffer2)); + used_output.out_delayed_and_compressed_holder = std::make_unique(std::move(cascade_buffer1), std::move(cascade_buffer2)); + used_output.out_maybe_delayed_and_compressed = used_output.out_delayed_and_compressed_holder.get(); } else { - used_output.out_maybe_delayed_and_compressed = used_output.out_maybe_compressed; + used_output.out_maybe_delayed_and_compressed = used_output.out_maybe_compressed.get(); } /// Request body can be compressed using algorithm specified in the Content-Encoding header. @@ -798,14 +816,8 @@ void HTTPHandler::processQuery( const auto & query = getQuery(request, params, context); std::unique_ptr in_param = std::make_unique(query); - /// HTTP response compression is turned on only if the client signalled that they support it - /// (using Accept-Encoding header) and 'enable_http_compression' setting is turned on. - used_output.out->setCompression(client_supports_http_compression && settings.enable_http_compression); - if (client_supports_http_compression) - used_output.out->setCompressionLevel(static_cast(settings.http_zlib_compression_level)); - - used_output.out->setSendProgress(settings.send_progress_in_http_headers); - used_output.out->setSendProgressInterval(settings.http_headers_progress_interval_ms); + used_output.out_holder->setSendProgress(settings.send_progress_in_http_headers); + used_output.out_holder->setSendProgressInterval(settings.http_headers_progress_interval_ms); /// If 'http_native_compression_disable_checksumming_on_decompress' setting is turned on, /// checksums of client data compressed with internal algorithm are not checked. @@ -816,7 +828,7 @@ void HTTPHandler::processQuery( /// Note that whether the header is added is determined by the settings, and we can only get the user settings after authentication. /// Once the authentication fails, the header can't be added. if (settings.add_http_cors_header && !request.get("Origin", "").empty() && !config.has("http_options_response")) - used_output.out->addHeaderCORS(true); + used_output.out_holder->addHeaderCORS(true); auto append_callback = [my_context = context] (ProgressCallback callback) { @@ -835,7 +847,7 @@ void HTTPHandler::processQuery( /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` append_callback([&used_output](const Progress & progress) { - used_output.out->onProgress(progress); + used_output.out_holder->onProgress(progress); }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) @@ -888,6 +900,8 @@ void HTTPHandler::processQuery( {}, handle_exception_in_output_format); + session->releaseSessionID(); + if (used_output.hasDelayed()) { /// TODO: set Content-Length if possible @@ -902,10 +916,8 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - /// In case data has already been sent, like progress headers, try using the output buffer to - /// set the exception code since it will be able to append it if it hasn't finished writing headers - if (response.sent() && used_output.out) - used_output.out->setExceptionCode(exception_code); + if (used_output.out_holder) + used_output.out_holder->setExceptionCode(exception_code); else response.set("X-ClickHouse-Exception-Code", toString(exception_code)); @@ -930,10 +942,10 @@ try response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); } - if (!response.sent() && !used_output.out_maybe_compressed && !used_output.exception_is_written) + if (!used_output.out_holder && !used_output.exception_is_written) { /// If nothing was sent yet and we don't even know if we must compress the response. - *response.send() << s << std::endl; + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT).writeln(s); } else if (used_output.out_maybe_compressed) { @@ -943,7 +955,8 @@ try /// do not call finalize here for CascadeWriteBuffer used_output.out_maybe_delayed_and_compressed, /// exception is written into used_output.out_maybe_compressed later /// HTTPHandler::trySendExceptionToClient is called with exception context, it is Ok to destroy buffers - used_output.out_maybe_delayed_and_compressed.reset(); + used_output.out_delayed_and_compressed_holder.reset(); + used_output.out_maybe_delayed_and_compressed = nullptr; } if (!used_output.exception_is_written) @@ -953,12 +966,12 @@ try /// Also HTTP code 200 could have already been sent. /// If buffer has data, and that data wasn't sent yet, then no need to send that data - bool data_sent = used_output.out->count() != used_output.out->offset(); + bool data_sent = used_output.out_holder->count() != used_output.out_holder->offset(); if (!data_sent) { used_output.out_maybe_compressed->position() = used_output.out_maybe_compressed->buffer().begin(); - used_output.out->position() = used_output.out->buffer().begin(); + used_output.out_holder->position() = used_output.out_holder->buffer().begin(); } writeString(s, *used_output.out_maybe_compressed); @@ -989,7 +1002,7 @@ catch (...) } -void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); ThreadStatus thread_status; @@ -1078,7 +1091,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse "is no Content-Length header for POST request"); } - processQuery(request, params, response, used_output, query_scope); + processQuery(request, params, response, used_output, query_scope, write_event); if (request_credentials) LOG_DEBUG(log, "Authentication in progress..."); else diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 6ec56cb09c1..026bda43d14 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -6,15 +6,9 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include +#include +#include namespace CurrentMetrics { @@ -40,7 +34,7 @@ public: HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_); virtual ~HTTPHandler() override; - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; /// This method is called right before the query execution. virtual void customizeContext(HTTPServerRequest & /* request */, ContextMutablePtr /* context */, ReadBuffer & /* body */) {} @@ -61,11 +55,22 @@ private: * WriteBufferFromHTTPServerResponse out */ - std::shared_ptr out; - /// Points to 'out' or to CompressedWriteBuffer(*out), depending on settings. + /// Holds original response buffer + std::shared_ptr out_holder; + /// If HTTP compression is enabled holds compression wrapper over original response buffer + std::shared_ptr wrap_compressed_holder; + /// Points either to out_holder or to wrap_compressed_holder + std::shared_ptr out; + + /// If internal compression is enabled holds compression wrapper over out buffer + std::shared_ptr out_compressed_holder; + /// Points to 'out' or to CompressedWriteBuffer(*out) std::shared_ptr out_maybe_compressed; - /// Points to 'out' or to CompressedWriteBuffer(*out) or to CascadeWriteBuffer. - std::shared_ptr out_maybe_delayed_and_compressed; + + /// If output should be delayed holds cascade buffer + std::unique_ptr out_delayed_and_compressed_holder; + /// Points to out_maybe_compressed or to CascadeWriteBuffer. + WriteBuffer * out_maybe_delayed_and_compressed = nullptr; bool finalized = false; @@ -73,7 +78,7 @@ private: inline bool hasDelayed() const { - return out_maybe_delayed_and_compressed != out_maybe_compressed; + return out_maybe_delayed_and_compressed != out_maybe_compressed.get(); } inline void finalize() @@ -82,11 +87,9 @@ private: return; finalized = true; - if (out_maybe_delayed_and_compressed) - out_maybe_delayed_and_compressed->finalize(); if (out_maybe_compressed) out_maybe_compressed->finalize(); - if (out) + else if (out) out->finalize(); } @@ -135,7 +138,8 @@ private: HTMLForm & params, HTTPServerResponse & response, Output & used_output, - std::optional & query_scope); + std::optional & query_scope, + const ProfileEvents::Event & write_event); void trySendExceptionToClient( const std::string & s, diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 94c1ad5b6f2..1f5db283323 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -4,21 +4,13 @@ #include #include #include +#include #include #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 53773a83b40..c41d68bab02 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -77,7 +77,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer } -void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("IntersrvHandler"); ThreadStatus thread_status; @@ -89,7 +89,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe Output used_output; const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); used_output.out = std::make_shared( - response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); auto write_response = [&](const std::string & message) { diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h index da5b286b9e5..66042ad3d1d 100644 --- a/src/Server/InterserverIOHTTPHandler.h +++ b/src/Server/InterserverIOHTTPHandler.h @@ -30,7 +30,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: struct Output diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index ed972055aee..de6edd199d7 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -19,7 +19,7 @@ namespace DB { -void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTPServerResponse & response) +void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { @@ -58,7 +58,7 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP if (!response.sent()) { /// We have not sent anything yet and we don't even know if we need to compress response. - *response.send() << getCurrentExceptionMessage(false) << std::endl; + *response.send() << getCurrentExceptionMessage(false) << '\n'; } } catch (...) diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h index 00b51b886f9..a16aa9f8021 100644 --- a/src/Server/KeeperReadinessHandler.h +++ b/src/Server/KeeperReadinessHandler.h @@ -22,7 +22,7 @@ public: { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; HTTPRequestHandlerFactoryPtr diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 10b520ca97a..cb912e000e5 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include #include +#include #if USE_SSL # include @@ -70,13 +70,17 @@ MySQLHandler::MySQLHandler( IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, - bool ssl_enabled, uint32_t connection_id_) + bool ssl_enabled, uint32_t connection_id_, + const ProfileEvents::Event & read_event_, + const ProfileEvents::Event & write_event_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , log(&Poco::Logger::get("MySQLHandler")) , connection_id(connection_id_) , auth_plugin(new MySQLProtocol::Authentication::Native41()) + , read_event(read_event_) + , write_event(write_event_) { server_capabilities = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) @@ -98,8 +102,8 @@ void MySQLHandler::run() session->setClientConnectionId(connection_id); - in = std::make_shared(socket()); - out = std::make_shared(socket()); + in = std::make_shared(socket(), read_event); + out = std::make_shared(socket(), write_event); packet_endpoint = std::make_shared(*in, *out, sequence_id); try @@ -489,8 +493,10 @@ MySQLHandlerSSL::MySQLHandlerSSL( bool ssl_enabled, uint32_t connection_id_, RSA & public_key_, - RSA & private_key_) - : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_) + RSA & private_key_, + const ProfileEvents::Event & read_event_, + const ProfileEvents::Event & write_event_) + : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_, read_event_, write_event_) , public_key(public_key_) , private_key(private_key_) {} @@ -524,16 +530,18 @@ void MySQLHandlerSSL::finishHandshakeSSL( static bool isFederatedServerSetupSetCommand(const String & query) { - static const std::regex expr{ + re2::RE2::Options regexp_options; + regexp_options.set_case_sensitive(false); + static const re2::RE2 expr( "(^(SET NAMES(.*)))" "|(^(SET character_set_results(.*)))" "|(^(SET FOREIGN_KEY_CHECKS(.*)))" "|(^(SET AUTOCOMMIT(.*)))" "|(^(SET sql_mode(.*)))" "|(^(SET @@(.*)))" - "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))" - , std::regex::icase}; - return 1 == std::regex_match(query, expr); + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", regexp_options); + assert(expr.ok()); + return re2::RE2::FullMatch(query, expr); } /// Replace "[query(such as SHOW VARIABLES...)]" into "". @@ -592,8 +600,8 @@ static String killConnectionIdReplacementQuery(const String & query) if (query.size() > prefix.size()) { String suffix = query.data() + prefix.length(); - static const std::regex expr{"^[0-9]"}; - if (std::regex_match(suffix, expr)) + static const re2::RE2 expr("^[0-9]"); + if (re2::RE2::FullMatch(suffix, expr)) { String replacement = fmt::format("KILL QUERY WHERE query_id LIKE 'mysql:{}:%'", suffix); return replacement; diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 194b18bdc39..36d63ebca84 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -42,7 +42,9 @@ public: TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, - uint32_t connection_id_); + uint32_t connection_id_, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); void run() final; @@ -102,6 +104,9 @@ protected: std::shared_ptr in; std::shared_ptr out; bool secure_connection = false; + + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; }; #if USE_SSL @@ -115,7 +120,9 @@ public: bool ssl_enabled, uint32_t connection_id_, RSA & public_key_, - RSA & private_key_); + RSA & private_key_, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); private: void authPluginSSL() override; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index f74f57926f9..79234c647aa 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -21,9 +21,11 @@ namespace ErrorCodes extern const int OPENSSL_ERROR; } -MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_) +MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) , log(&Poco::Logger::get("MySQLHandlerFactory")) + , read_event(read_event_) + , write_event(write_event_) { #if USE_SSL try diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index fa4ce93f765..307ee3b2f0d 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "config.h" @@ -37,8 +38,11 @@ private: #endif std::atomic last_connection_id = 0; + + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; public: - explicit MySQLHandlerFactory(IServer & server_); + explicit MySQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()); void readRSAKeys(); diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 5b1db508551..38f56921c89 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -5,7 +5,7 @@ namespace DB { -void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 1cbfcd57f8f..a484d237771 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -10,7 +10,7 @@ class NotFoundHandler : public HTTPRequestHandler { public: NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: std::vector hints; }; diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index eeb3784c1df..c62dc8109ea 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -32,12 +32,16 @@ PostgreSQLHandler::PostgreSQLHandler( TCPServer & tcp_server_, bool ssl_enabled_, Int32 connection_id_, - std::vector> & auth_methods_) + std::vector> & auth_methods_, + const ProfileEvents::Event & read_event_, + const ProfileEvents::Event & write_event_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , ssl_enabled(ssl_enabled_) , connection_id(connection_id_) + , read_event(read_event_) + , write_event(write_event_) , authentication_manager(auth_methods_) { changeIO(socket()); @@ -45,8 +49,8 @@ PostgreSQLHandler::PostgreSQLHandler( void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket) { - in = std::make_shared(socket); - out = std::make_shared(socket); + in = std::make_shared(socket, read_event); + out = std::make_shared(socket, write_event); message_transport = std::make_shared(in.get(), out.get()); } diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index f20af3df02c..57b91a0ad04 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -33,7 +33,9 @@ public: TCPServer & tcp_server_, bool ssl_enabled_, Int32 connection_id_, - std::vector> & auth_methods_); + std::vector> & auth_methods_, + const ProfileEvents::Event & read_event_ = ProfileEvents::end(), + const ProfileEvents::Event & write_event_ = ProfileEvents::end()); void run() final; @@ -51,6 +53,9 @@ private: std::shared_ptr out; std::shared_ptr message_transport; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; + #if USE_SSL std::shared_ptr ss; #endif diff --git a/src/Server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp index 6f2124861e7..096bbbdcda9 100644 --- a/src/Server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -5,9 +5,11 @@ namespace DB { -PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_) +PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) , log(&Poco::Logger::get("PostgreSQLHandlerFactory")) + , read_event(read_event_) + , write_event(write_event_) { auth_methods = { @@ -20,7 +22,7 @@ Poco::Net::TCPServerConnection * PostgreSQLHandlerFactory::createConnection(cons { Int32 connection_id = last_connection_id++; LOG_TRACE(log, "PostgreSQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); - return new PostgreSQLHandler(socket, server, tcp_server, ssl_enabled, connection_id, auth_methods); + return new PostgreSQLHandler(socket, server, tcp_server, ssl_enabled, connection_id, auth_methods, read_event, write_event); } } diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index 35046325386..e5f762fca6d 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -15,6 +15,8 @@ class PostgreSQLHandlerFactory : public TCPServerConnectionFactory private: IServer & server; Poco::Logger * log; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; #if USE_SSL bool ssl_enabled = true; @@ -26,7 +28,7 @@ private: std::vector> auth_methods; public: - explicit PostgreSQLHandlerFactory(IServer & server_); + explicit PostgreSQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()); Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & server) override; }; diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp index 662a15a522a..3d09c2165e5 100644 --- a/src/Server/PrometheusMetricsWriter.cpp +++ b/src/Server/PrometheusMetricsWriter.cpp @@ -1,10 +1,9 @@ #include "PrometheusMetricsWriter.h" #include -#include /// TODO: this library is harmful. +#include #include - namespace { @@ -26,9 +25,11 @@ void writeOutLine(DB::WriteBuffer & wb, T && val, TArgs &&... args) /// Returns false if name is not valid bool replaceInvalidChars(std::string & metric_name) { - /// dirty solution - metric_name = std::regex_replace(metric_name, std::regex("[^a-zA-Z0-9_:]"), "_"); - metric_name = std::regex_replace(metric_name, std::regex("^[^a-zA-Z]*"), ""); + /// dirty solution: + static const re2::RE2 regexp1("[^a-zA-Z0-9_:]"); + static const re2::RE2 regexp2("^[^a-zA-Z]*"); + re2::RE2::GlobalReplace(&metric_name, regexp1, "_"); + re2::RE2::GlobalReplace(&metric_name, regexp2, ""); return !metric_name.empty(); } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 127ed843cb6..12caad5eea1 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -13,7 +13,7 @@ namespace DB { -void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { try { @@ -27,7 +27,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); - WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); try { metrics_writer.write(wb); @@ -44,8 +44,8 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe } } -HTTPRequestHandlerFactoryPtr -createPrometheusHandlerFactory(IServer & server, +HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory( + IServer & server, const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics, const std::string & config_prefix) diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h index 1fb3d9f0f59..9ec54cc2e4e 100644 --- a/src/Server/PrometheusRequestHandler.h +++ b/src/Server/PrometheusRequestHandler.h @@ -16,13 +16,13 @@ private: const PrometheusMetricsWriter & metrics_writer; public: - explicit PrometheusRequestHandler(IServer & server_, const PrometheusMetricsWriter & metrics_writer_) + PrometheusRequestHandler(IServer & server_, const PrometheusMetricsWriter & metrics_writer_) : server(server_) , metrics_writer(metrics_writer_) { } - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index c30c3ebaa77..07f3b67b6a7 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -22,7 +22,7 @@ ReplicasStatusHandler::ReplicasStatusHandler(IServer & server) : WithContext(ser { } -void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { try { @@ -113,7 +113,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe if (!response.sent()) { /// We have not sent anything yet and we don't even know if we need to compress response. - *response.send() << getCurrentExceptionMessage(false) << std::endl; + *response.send() << getCurrentExceptionMessage(false) << '\n'; } } catch (...) diff --git a/src/Server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h index 1a5388aa2ab..08fd757b0d6 100644 --- a/src/Server/ReplicasStatusHandler.h +++ b/src/Server/ReplicasStatusHandler.h @@ -14,7 +14,7 @@ class ReplicasStatusHandler : public HTTPRequestHandler, WithContext public: explicit ReplicasStatusHandler(IServer & server_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 34cb5d2d169..67bf3875de4 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -33,9 +33,11 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } -static inline WriteBufferPtr +static inline std::unique_ptr responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout) { + auto buf = std::unique_ptr(new WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout)); + /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); CompressionMethod http_response_compression_method = CompressionMethod::None; @@ -43,14 +45,11 @@ responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, if (!http_response_compression_methods.empty()) http_response_compression_method = chooseHTTPCompressionMethod(http_response_compression_methods); - bool client_supports_http_compression = http_response_compression_method != CompressionMethod::None; + if (http_response_compression_method == CompressionMethod::None) + return buf; - return std::make_shared( - response, - request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, - keep_alive_timeout, - client_supports_http_compression, - http_response_compression_method); + response.set("Content-Encoding", toContentEncodingName(http_response_compression_method)); + return wrapWriteBufferWithCompressionMethod(std::move(buf), http_response_compression_method, 1); } static inline void trySendExceptionToClient( @@ -69,7 +68,7 @@ static inline void trySendExceptionToClient( response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - *response.send() << s << std::endl; + *response.send() << s << '\n'; else { if (out.count() != out.offset()) @@ -88,10 +87,10 @@ static inline void trySendExceptionToClient( } } -void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); - const auto & out = responseWriteBuffer(request, response, keep_alive_timeout); + auto out = responseWriteBuffer(request, response, keep_alive_timeout); try { diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h index df9374d4409..38d774bb0aa 100644 --- a/src/Server/StaticRequestHandler.h +++ b/src/Server/StaticRequestHandler.h @@ -29,7 +29,7 @@ public: void writeResponse(WriteBuffer & out); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a563e0e0004..fa7206eeaac 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -184,23 +184,27 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo namespace DB { -TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get("TCPHandler")) + , read_event(read_event_) + , write_event(write_event_) , server_display_name(std::move(server_display_name_)) { } -TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_) +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) , log(&Poco::Logger::get("TCPHandler")) , forwarded_for(stack_data.forwarded_for) , certificate(stack_data.certificate) + , read_event(read_event_) + , write_event(write_event_) , default_database(stack_data.default_database) , server_display_name(std::move(server_display_name_)) { @@ -233,8 +237,8 @@ void TCPHandler::runImpl() socket().setSendTimeout(send_timeout); socket().setNoDelay(true); - in = std::make_shared(socket()); - out = std::make_shared(socket()); + in = std::make_shared(socket(), read_event); + out = std::make_shared(socket(), write_event); /// Support for PROXY protocol if (parse_proxy_protocol && !receiveProxyHeader()) @@ -2023,7 +2027,7 @@ void TCPHandler::initBlockOutput(const Block & block) if (state.compression == Protocol::Compression::Enable) { - CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_deflate_qpl_codec, query_settings.enable_zstd_qat_codec); state.maybe_compressed_out = std::make_shared( *out, CompressionCodecFactory::instance().get(method, level)); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 45c10b1c27d..4eb84ee5eee 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -147,8 +147,8 @@ public: * because it allows to check the IP ranges of the trusted proxy. * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ - TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); - TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()); ~TCPHandler() override; void run() override; @@ -191,6 +191,9 @@ private: std::shared_ptr in; std::shared_ptr out; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; + /// Time after the last check to stop the request and send the progress. Stopwatch after_check_cancelled; Stopwatch after_send_progress; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index fde04c6e0ab..3eb032f4250 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -21,6 +21,9 @@ private: Poco::Logger * log; std::string server_display_name; + ProfileEvents::Event read_event; + ProfileEvents::Event write_event; + class DummyTCPHandler : public Poco::Net::TCPServerConnection { public: @@ -33,9 +36,11 @@ public: * and set the information about forwarded address accordingly. * See https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt */ - TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_) + TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()) : server(server_), parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) + , read_event(read_event_) + , write_event(write_event_) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -45,8 +50,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - - return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name); + return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name, read_event, write_event); } catch (const Poco::Net::NetException &) { @@ -60,8 +64,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - - return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name); + return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name, read_event, write_event); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index ad48b38b91a..ac7a3bfccf3 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -1,20 +1,13 @@ #include "WebUIRequestHandler.h" #include "IServer.h" +#include #include #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include #include @@ -36,7 +29,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_) } -void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) +void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/) { auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds(); @@ -50,7 +43,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { @@ -66,17 +59,17 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)"; RE2::Replace(&html, uplot_url, "/js/uplot.js"); - *response.send() << html; + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html); } else if (request.getURI().starts_with("/binary")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << std::string_view(reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize); } else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h index 09fe62d41c3..c52946e2089 100644 --- a/src/Server/WebUIRequestHandler.h +++ b/src/Server/WebUIRequestHandler.h @@ -16,7 +16,7 @@ private: public: WebUIRequestHandler(IServer & server_); - void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 9fc785373b3..1fb53475801 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -439,7 +439,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) column.comment = *comment; if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true, true); column.ttl = ttl; @@ -504,7 +504,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) else { if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true, true); if (comment) column.comment = *comment; @@ -1249,7 +1249,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const "this column name is reserved for _block_number persisting feature", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1274,7 +1274,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (all_columns.hasAlias(column_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); } auto column_default = all_columns.getDefault(column_name); if (column_default) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 697350faf09..1712b984596 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -53,6 +53,16 @@ ColumnDescription::ColumnDescription(String name_, DataTypePtr type_) { } +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, String comment_) + : name(std::move(name_)), type(std::move(type_)), comment(comment_) +{ +} + +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_) + : name(std::move(name_)), type(std::move(type_)), comment(comment_), codec(codec_) +{ +} + bool ColumnDescription::operator==(const ColumnDescription & other) const { auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; }; @@ -140,7 +150,7 @@ void ColumnDescription::readText(ReadBuffer & buf) comment = col_ast->comment->as().value.get(); if (col_ast->codec) - codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true); + codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); if (col_ast->ttl) ttl = col_ast->ttl; @@ -150,16 +160,18 @@ void ColumnDescription::readText(ReadBuffer & buf) } } -ColumnsDescription::ColumnsDescription(std::initializer_list ordinary) +ColumnsDescription::ColumnsDescription(std::initializer_list ordinary) { - for (const auto & elem : ordinary) - add(ColumnDescription(elem.name, elem.type)); + for (auto && elem : ordinary) + add(elem); } -ColumnsDescription::ColumnsDescription(NamesAndTypes ordinary) +ColumnsDescription ColumnsDescription::fromNamesAndTypes(NamesAndTypes ordinary) { + ColumnsDescription result; for (auto & elem : ordinary) - add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + result.add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + return result; } ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary) @@ -173,6 +185,11 @@ ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, NamesAndAlias for (auto & elem : ordinary) add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + setAliases(std::move(aliases)); +} + +void ColumnsDescription::setAliases(NamesAndAliases aliases) +{ for (auto & alias : aliases) { ColumnDescription description(std::move(alias.name), std::move(alias.type)); diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 4de8aa11de3..9a133f81d7a 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -90,6 +90,8 @@ struct ColumnDescription ColumnDescription(ColumnDescription &&) = default; ColumnDescription(const ColumnDescription &) = default; ColumnDescription(String name_, DataTypePtr type_); + ColumnDescription(String name_, DataTypePtr type_, String comment_); + ColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } @@ -105,14 +107,16 @@ class ColumnsDescription : public IHints<> public: ColumnsDescription() = default; - ColumnsDescription(std::initializer_list ordinary); - - explicit ColumnsDescription(NamesAndTypes ordinary); + static ColumnsDescription fromNamesAndTypes(NamesAndTypes ordinary); explicit ColumnsDescription(NamesAndTypesList ordinary); + explicit ColumnsDescription(std::initializer_list ordinary); + explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases); + void setAliases(NamesAndAliases aliases); + /// `after_column` can be a Nested column name; void add(ColumnDescription column, const String & after_column = String(), bool first = false, bool add_subcolumns = true); /// `column_name` can be a Nested column name; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index acdc6f142a4..650539ef1e9 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -740,7 +740,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const if (compression_method == "ZSTD") compression_level = settings.network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_deflate_qpl_codec, settings.enable_zstd_qat_codec); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); /// tmp directory is used to ensure atomicity of transactions diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 642ad206097..12b32b740de 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -2,15 +2,7 @@ #include #include #include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#include #if USE_HDFS #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 974b2bb68cf..c1c327de74a 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include +#include #include #include @@ -48,15 +49,6 @@ #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace fs = std::filesystem; namespace ProfileEvents @@ -724,13 +716,13 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { + const auto & settings = context->getSettingsRef(); write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique( - uri, - context->getGlobalContext()->getConfigRef(), - context->getSettingsRef().hdfs_replication, - context->getWriteSettings()), - compression_method, 3); + uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), + compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 88603d56ebb..85ef6a0bb35 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -165,11 +165,11 @@ void IStorage::readFromPipe( if (pipe.empty()) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); } else { - auto read_step = std::make_unique(std::move(pipe), storage_name, query_info, context); + auto read_step = std::make_unique(std::move(pipe), storage_name, context, query_info); query_plan.addStep(std::move(read_step)); } } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index c54947dde8e..95b7c17ae78 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include @@ -169,10 +168,19 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write String remote_fs_metadata = parse(params.get("remote_fs_metadata", "")); - std::regex re("\\s*,\\s*"); - Strings capability( - std::sregex_token_iterator(remote_fs_metadata.begin(), remote_fs_metadata.end(), re, -1), - std::sregex_token_iterator()); + /// Tokenize capabilities from remote_fs_metadata + /// E.g. remote_fs_metadata = "local, s3_plain, web" --> capabilities = ["local", "s3_plain", "web"] + Strings capabilities; + const String delimiter(", "); + size_t pos_start = 0; + size_t pos_end; + while ((pos_end = remote_fs_metadata.find(delimiter, pos_start)) != std::string::npos) + { + const String token = remote_fs_metadata.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delimiter.size(); + capabilities.push_back(token); + } + capabilities.push_back(remote_fs_metadata.substr(pos_start)); bool send_projections = client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION; @@ -188,9 +196,9 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) { auto disk_type = part->getDataPartStorage().getDiskType(); - if (part->getDataPartStorage().supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end()) + if (part->getDataPartStorage().supportZeroCopyReplication() && std::find(capabilities.begin(), capabilities.end(), disk_type) != capabilities.end()) { - /// Send metadata if the receiver's capability covers the source disk type. + /// Send metadata if the receiver's capabilities covers the source disk type. response.addCookie({"remote_fs_metadata", disk_type}); sendPartFromDisk(part, out, client_protocol_version, true, send_projections); return; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4ad6c564a18..e3de926570b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6888,7 +6888,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( ContextPtr query_context, QueryProcessingStage::Enum to_stage, const StorageSnapshotPtr &, - SelectQueryInfo & query_info) const + SelectQueryInfo &) const { if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; @@ -6905,11 +6905,6 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( return QueryProcessingStage::Enum::WithMergeableState; } - if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) - { - query_info.projection = std::nullopt; - } - return QueryProcessingStage::Enum::FetchColumns; } @@ -6929,13 +6924,12 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead( query_info.prewhere_info, storage_snapshot->getMetadataForQuery()->getColumns().getAll().getNames(), storage_snapshot->metadata, - storage_snapshot->metadata, query_info, added_filter_nodes, query_context, query_context->getSettingsRef().max_threads); - UInt64 total_rows = result_ptr->rows(); + UInt64 total_rows = result_ptr->selected_rows; if (query_info.limit > 0 && query_info.limit < total_rows) total_rows = query_info.limit; return total_rows; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f78b383e173..8c03aef6f99 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -239,8 +239,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (!best_partition_id_to_optimize.empty()) res.emplace(std::move(best_partition_id_to_optimize)); - LOG_TRACE(log, "Checked {} partitions, found {} partitions with parts that may be merged: [{}]" - "(max_total_size_to_merge={}, merge_with_ttl_allowed{})", + LOG_TRACE(log, "Checked {} partitions, found {} partitions with parts that may be merged: [{}] " + "(max_total_size_to_merge={}, merge_with_ttl_allowed={})", all_partition_ids.size(), res.size(), fmt::join(res, ", "), max_total_size_to_merge, merge_with_ttl_allowed); return res; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d5b9b4423a9..66f593bbf33 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -125,22 +125,6 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTSampleRatio::Ra return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); } -static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query) -{ - SortDescription order_descr; - order_descr.reserve(query.groupBy()->children.size()); - - for (const auto & elem : query.groupBy()->children) - { - /// Note, here aliases should not be used, since there will be no such column in a block. - String name = elem->getColumnNameWithoutAlias(); - order_descr.emplace_back(name, 1, 1); - } - - return order_descr; -} - - QueryPlanPtr MergeTreeDataSelectExecutor::read( const Names & column_names_to_return, const StorageSnapshotPtr & storage_snapshot, @@ -148,339 +132,29 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( ContextPtr context, const UInt64 max_block_size, const size_t num_streams, - QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read, bool enable_parallel_reading) const { - if (query_info.merge_tree_empty_result) - return std::make_unique(); - - const auto & settings = context->getSettingsRef(); - - const auto & metadata_for_reading = storage_snapshot->getMetadataForQuery(); - const auto & snapshot_data = assert_cast(*storage_snapshot->data); - const auto & parts = snapshot_data.parts; const auto & alter_conversions = snapshot_data.alter_conversions; - if (!query_info.projection) - { - auto step = readFromParts( - query_info.merge_tree_select_result_ptr ? MergeTreeData::DataPartsVector{} : parts, - query_info.merge_tree_select_result_ptr ? std::vector{} : alter_conversions, - column_names_to_return, - storage_snapshot, - query_info, - context, - max_block_size, - num_streams, - max_block_numbers_to_read, - query_info.merge_tree_select_result_ptr, - enable_parallel_reading); - - auto plan = std::make_unique(); - if (step) - plan->addStep(std::move(step)); - return plan; - } - - LOG_DEBUG( - log, - "Choose {} {} projection {}", - query_info.projection->complete ? "complete" : "incomplete", - query_info.projection->desc->type, - query_info.projection->desc->name); - - const ASTSelectQuery & select_query = query_info.query->as(); - QueryPlanResourceHolder resources; - - auto projection_plan = std::make_unique(); - if (query_info.projection->desc->is_minmax_count_projection) - { - Pipe pipe(std::make_shared(query_info.minmax_count_projection_block)); - auto read_from_pipe = std::make_unique(std::move(pipe)); - projection_plan->addStep(std::move(read_from_pipe)); - } - else if (query_info.projection->merge_tree_projection_select_result_ptr) - { - LOG_DEBUG(log, "projection required columns: {}", fmt::join(query_info.projection->required_columns, ", ")); - projection_plan->addStep(readFromParts( - /*parts=*/ {}, - /*alter_conversions=*/ {}, - query_info.projection->required_columns, - storage_snapshot, - query_info, - context, - max_block_size, - num_streams, - max_block_numbers_to_read, - query_info.projection->merge_tree_projection_select_result_ptr, - enable_parallel_reading)); - } - - if (projection_plan->isInitialized()) - { - if (query_info.projection->before_where) - { - auto where_step = std::make_unique( - projection_plan->getCurrentDataStream(), - query_info.projection->before_where, - query_info.projection->where_column_name, - query_info.projection->remove_where_filter); - - where_step->setStepDescription("WHERE"); - projection_plan->addStep(std::move(where_step)); - } - - if (query_info.projection->before_aggregation) - { - auto expression_before_aggregation - = std::make_unique(projection_plan->getCurrentDataStream(), query_info.projection->before_aggregation); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - projection_plan->addStep(std::move(expression_before_aggregation)); - } - - /// NOTE: input_order_info (for projection and not) is set only if projection is complete - if (query_info.has_order_by && !query_info.need_aggregate && query_info.projection->input_order_info) - { - chassert(query_info.projection->complete); - - SortDescription output_order_descr = InterpreterSelectQuery::getSortDescription(select_query, context); - UInt64 limit = InterpreterSelectQuery::getLimitForSorting(select_query, context); - - auto sorting_step = std::make_unique( - projection_plan->getCurrentDataStream(), - query_info.projection->input_order_info->sort_description_for_merging, - output_order_descr, - settings.max_block_size, - limit); - - sorting_step->setStepDescription("ORDER BY for projections"); - projection_plan->addStep(std::move(sorting_step)); - } - } - - auto ordinary_query_plan = std::make_unique(); - if (query_info.projection->merge_tree_normal_select_result_ptr) - { - auto storage_from_base_parts_of_projection - = std::make_shared(data, query_info.projection->merge_tree_normal_select_result_ptr); - auto interpreter = InterpreterSelectQuery( - query_info.query, - context, - storage_from_base_parts_of_projection, - nullptr, - SelectQueryOptions{processed_stage}.projectionQuery()); - - interpreter.buildQueryPlan(*ordinary_query_plan); - - const auto & expressions = interpreter.getAnalysisResult(); - if (processed_stage == QueryProcessingStage::Enum::FetchColumns && expressions.before_where) - { - auto where_step = std::make_unique( - ordinary_query_plan->getCurrentDataStream(), - expressions.before_where, - expressions.where_column_name, - expressions.remove_where_filter); - where_step->setStepDescription("WHERE"); - ordinary_query_plan->addStep(std::move(where_step)); - } - } - - Pipe projection_pipe; - Pipe ordinary_pipe; - if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) - { - auto make_aggregator_params = [&](bool projection) - { - const auto & keys = query_info.projection->aggregation_keys.getNames(); - - AggregateDescriptions aggregates = query_info.projection->aggregate_descriptions; - - /// This part is hacky. - /// We want AggregatingTransform to work with aggregate states instead of normal columns. - /// It is almost the same, just instead of adding new data to aggregation state we merge it with existing. - /// - /// It is needed because data in projection: - /// * is not merged completely (we may have states with the same key in different parts) - /// * is not split into buckets (so if we just use MergingAggregated, it will use single thread) - const bool only_merge = projection; - - Aggregator::Params params( - keys, - aggregates, - query_info.projection->aggregate_overflow_row, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - settings.group_by_two_level_threshold, - settings.group_by_two_level_threshold_bytes, - settings.max_bytes_before_external_group_by, - settings.empty_result_for_aggregation_by_empty_set, - context->getTempDataOnDisk(), - settings.max_threads, - settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression, - settings.max_block_size, - settings.enable_software_prefetch_in_aggregation, - only_merge, - settings.optimize_group_by_constant_keys, - settings.min_hit_rate_to_use_consecutive_keys_optimization, - /*stats_collecting_params=*/ {}); - - return std::make_pair(params, only_merge); - }; - - if (ordinary_query_plan->isInitialized() && projection_plan->isInitialized()) - { - auto projection_builder = projection_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - projection_pipe = QueryPipelineBuilder::getPipe(std::move(*projection_builder), resources); - - auto ordinary_builder = ordinary_query_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - ordinary_pipe = QueryPipelineBuilder::getPipe(std::move(*ordinary_builder), resources); - - /// Here we create shared ManyAggregatedData for both projection and ordinary data. - /// For ordinary data, AggregatedData is filled in a usual way. - /// For projection data, AggregatedData is filled by merging aggregation states. - /// When all AggregatedData is filled, we merge aggregation states together in a usual way. - /// Pipeline will look like: - /// ReadFromProjection -> Aggregating (only merge states) -> - /// ReadFromProjection -> Aggregating (only merge states) -> - /// ... -> Resize -> ConvertingAggregatedToChunks - /// ReadFromOrdinaryPart -> Aggregating (usual) -> (added by last Aggregating) - /// ReadFromOrdinaryPart -> Aggregating (usual) -> - /// ... - auto many_data = std::make_shared(projection_pipe.numOutputPorts() + ordinary_pipe.numOutputPorts()); - size_t counter = 0; - - AggregatorListPtr aggregator_list_ptr = std::make_shared(); - - /// TODO apply optimize_aggregation_in_order here too (like below) - auto build_aggregate_pipe = [&](Pipe & pipe, bool projection) - { - auto [params, only_merge] = make_aggregator_params(projection); - - AggregatingTransformParamsPtr transform_params = std::make_shared( - pipe.getHeader(), std::move(params), aggregator_list_ptr, query_info.projection->aggregate_final); - - pipe.resize(pipe.numOutputPorts(), true, true); - - auto merge_threads = num_streams; - auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads - ? static_cast(settings.aggregation_memory_efficient_merge_threads) - : static_cast(settings.max_threads); - - pipe.addSimpleTransform([&](const Block & header) - { - return std::make_shared( - header, transform_params, many_data, counter++, merge_threads, temporary_data_merge_threads); - }); - }; - - if (!projection_pipe.empty()) - build_aggregate_pipe(projection_pipe, true); - if (!ordinary_pipe.empty()) - build_aggregate_pipe(ordinary_pipe, false); - } - else - { - auto add_aggregating_step = [&](QueryPlanPtr & query_plan, bool projection) - { - auto [params, only_merge] = make_aggregator_params(projection); - - auto merge_threads = num_streams; - auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads - ? static_cast(settings.aggregation_memory_efficient_merge_threads) - : static_cast(settings.max_threads); - - InputOrderInfoPtr group_by_info = query_info.projection->input_order_info; - SortDescription sort_description_for_merging; - SortDescription group_by_sort_description; - if (group_by_info && settings.optimize_aggregation_in_order) - { - group_by_sort_description = getSortDescriptionFromGroupBy(select_query); - sort_description_for_merging = group_by_info->sort_description_for_merging; - } - else - group_by_info = nullptr; - - // We don't have information regarding the `to_stage` of the query processing, only about `from_stage` (which is passed through `processed_stage` argument). - // Thus we cannot assign false here since it may be a query over distributed table. - const bool should_produce_results_in_order_of_bucket_number = true; - - auto aggregating_step = std::make_unique( - query_plan->getCurrentDataStream(), - std::move(params), - /* grouping_sets_params_= */ GroupingSetsParamsList{}, - query_info.projection->aggregate_final, - settings.max_block_size, - settings.aggregation_in_order_max_block_bytes, - merge_threads, - temporary_data_merge_threads, - /* storage_has_evenly_distributed_read_= */ false, - /* group_by_use_nulls */ false, - std::move(sort_description_for_merging), - std::move(group_by_sort_description), - should_produce_results_in_order_of_bucket_number, - settings.enable_memory_bound_merging_of_aggregation_results, - !group_by_info && settings.force_aggregation_in_order); - query_plan->addStep(std::move(aggregating_step)); - }; - - if (projection_plan->isInitialized()) - { - add_aggregating_step(projection_plan, true); - - auto projection_builder = projection_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - projection_pipe = QueryPipelineBuilder::getPipe(std::move(*projection_builder), resources); - } - if (ordinary_query_plan->isInitialized()) - { - add_aggregating_step(ordinary_query_plan, false); - - auto ordinary_builder = ordinary_query_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - ordinary_pipe = QueryPipelineBuilder::getPipe(std::move(*ordinary_builder), resources); - } - } - } - else - { - if (projection_plan->isInitialized()) - { - auto projection_builder = projection_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - projection_pipe = QueryPipelineBuilder::getPipe(std::move(*projection_builder), resources); - } - - if (ordinary_query_plan->isInitialized()) - { - auto ordinary_builder = ordinary_query_plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - ordinary_pipe = QueryPipelineBuilder::getPipe(std::move(*ordinary_builder), resources); - } - } - - Pipes pipes; - pipes.emplace_back(std::move(projection_pipe)); - pipes.emplace_back(std::move(ordinary_pipe)); - auto pipe = Pipe::unitePipes(std::move(pipes)); - auto plan = std::make_unique(); - if (pipe.empty()) - return plan; - - pipe.resize(1); - auto step = std::make_unique( - std::move(pipe), - fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name), + auto step = readFromParts( + parts, + alter_conversions, + column_names_to_return, + storage_snapshot, query_info, - context); - plan->addStep(std::move(step)); - plan->addInterpreterContext(query_info.projection->context); + context, + max_block_size, + num_streams, + max_block_numbers_to_read, + /*merge_tree_select_result_ptr=*/ nullptr, + enable_parallel_reading); + + auto plan = std::make_unique(); + if (step) + plan->addStep(std::move(step)); return plan; } @@ -839,6 +513,7 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( if (metadata_snapshot->hasPartitionKey()) { + chassert(minmax_idx_condition && partition_pruner); const auto & partition_key = metadata_snapshot->getPartitionKey(); minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); @@ -1256,11 +931,10 @@ static void selectColumnNames( } } -MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( +ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, const Names & column_names_to_return, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, @@ -1270,8 +944,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar { size_t total_parts = parts.size(); if (total_parts == 0) - return std::make_shared( - MergeTreeDataSelectAnalysisResult{.result = ReadFromMergeTree::AnalysisResult()}); + return std::make_shared(); Names real_column_names; Names virt_column_names; @@ -1289,7 +962,6 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar /*alter_conversions=*/ {}, prewhere_info, added_filter_nodes, - metadata_snapshot_base, metadata_snapshot, query_info, context, @@ -1312,13 +984,13 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( const UInt64 max_block_size, const size_t num_streams, std::shared_ptr max_block_numbers_to_read, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr, + ReadFromMergeTree::AnalysisResultPtr merge_tree_select_result_ptr, bool enable_parallel_reading) const { /// If merge_tree_select_result_ptr != nullptr, we use analyzed result so parts will always be empty. if (merge_tree_select_result_ptr) { - if (merge_tree_select_result_ptr->marks() == 0) + if (merge_tree_select_result_ptr->selected_marks == 0) return {}; } else if (parts.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 4c6e1086cbc..ba1f20054f0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -34,7 +34,6 @@ public: ContextPtr context, UInt64 max_block_size, size_t num_streams, - QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read = nullptr, bool enable_parallel_reading = false) const; @@ -49,17 +48,16 @@ public: UInt64 max_block_size, size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr, + ReadFromMergeTree::AnalysisResultPtr merge_tree_select_result_ptr = nullptr, bool enable_parallel_reading = false) const; /// Get an estimation for the number of marks we are going to read. /// Reads nothing. Secondary indexes are not used. /// This method is used to select best projection for table. - MergeTreeDataSelectAnalysisResultPtr estimateNumMarksToRead( + ReadFromMergeTree::AnalysisResultPtr estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, const Names & column_names, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 6606e4d738e..bbb38346f38 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -37,7 +37,7 @@ public: } /// Used in queries with projection. - StorageFromMergeTreeDataPart(const MergeTreeData & storage_, MergeTreeDataSelectAnalysisResultPtr analysis_result_ptr_) + StorageFromMergeTreeDataPart(const MergeTreeData & storage_, ReadFromMergeTree::AnalysisResultPtr analysis_result_ptr_) : IStorage(storage_.getStorageID()), storage(storage_), analysis_result_ptr(analysis_result_ptr_) { setInMemoryMetadata(storage.getInMemoryMetadata()); @@ -127,7 +127,7 @@ private: const std::vector alter_conversions; const MergeTreeData & storage; const String partition_id; - const MergeTreeDataSelectAnalysisResultPtr analysis_result_ptr; + const ReadFromMergeTree::AnalysisResultPtr analysis_result_ptr; static StorageID getIDFromPart(const MergeTreeData::DataPartPtr & part_) { diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index cd7e99a6d18..9cb1fbd8506 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -347,11 +347,11 @@ void StorageNATS::read( if (pipe.empty()) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, local_context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); } else { - auto read_step = std::make_unique(std::move(pipe), getName(), query_info, local_context); + auto read_step = std::make_unique(std::move(pipe), getName(), local_context, query_info); query_plan.addStep(std::move(read_step)); query_plan.addInterpreterContext(modified_context); } diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index 3d0ff5d8dab..657ad91e825 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -3,10 +3,11 @@ #include #include #include +#include #include #include #include -#include + namespace ErrorCodes { @@ -96,7 +97,7 @@ void validateNamedCollection( const NamedCollection & collection, const Keys & required_keys, const Keys & optional_keys, - const std::vector & optional_regex_keys = {}) + const std::vector> & optional_regex_keys = {}) { NamedCollection::Keys keys = collection.getKeys(); auto required_keys_copy = required_keys; @@ -119,7 +120,7 @@ void validateNamedCollection( auto match = std::find_if( optional_regex_keys.begin(), optional_regex_keys.end(), - [&](const std::regex & regex) { return std::regex_search(key, regex); }) + [&](const std::shared_ptr & regex) { return re2::RE2::PartialMatch(key, *regex); }) != optional_regex_keys.end(); if (!match) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ec2e002b285..fce2d775b15 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -700,7 +700,7 @@ void StorageRabbitMQ::read( if (num_created_consumers == 0) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, local_context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); return; } @@ -758,11 +758,11 @@ void StorageRabbitMQ::read( if (pipe.empty()) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, local_context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); } else { - auto read_step = std::make_unique(std::move(pipe), getName(), query_info, local_context); + auto read_step = std::make_unique(std::move(pipe), getName(), local_context, query_info); query_plan.addStep(std::move(read_step)); query_plan.addInterpreterContext(modified_context); } diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 28053c84e20..e1d52eefc20 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -64,7 +64,7 @@ void readFinalFromNestedStorage( if (!query_plan.isInitialized()) { - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, nested_header, query_info, context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, nested_header, query_info); return; } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index cbb96ed4001..d0533b5ba0c 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -27,13 +27,14 @@ namespace DB { -NamesAndTypesList StorageSystemRocksDB::getNamesAndTypes() +ColumnsDescription StorageSystemRocksDB::getColumnsDescription() { - return { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "value", std::make_shared() }, + return ColumnsDescription + { + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Name of the table with StorageEmbeddedRocksDB engine."}, + {"name", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, }; } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.h b/src/Storages/RocksDB/StorageSystemRocksDB.h index deafba069f4..c1f10a7722d 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.h +++ b/src/Storages/RocksDB/StorageSystemRocksDB.h @@ -16,7 +16,7 @@ class StorageSystemRocksDB final : public IStorageSystemOneBlock #include #include -#include #include @@ -43,9 +42,6 @@ using ReadInOrderOptimizerPtr = std::shared_ptr; class Cluster; using ClusterPtr = std::shared_ptr; -struct MergeTreeDataSelectAnalysisResult; -using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; - struct PrewhereInfo { /// Actions for row level security filter. Applied separately before prewhere_actions. @@ -142,32 +138,6 @@ class IMergeTreeDataPart; using ManyExpressionActions = std::vector; -// The projection selected to execute current query -struct ProjectionCandidate -{ - ProjectionDescriptionRawPtr desc{}; - PrewhereInfoPtr prewhere_info; - ActionsDAGPtr before_where; - String where_column_name; - bool remove_where_filter = false; - ActionsDAGPtr before_aggregation; - Names required_columns; - NamesAndTypesList aggregation_keys; - AggregateDescriptions aggregate_descriptions; - bool aggregate_overflow_row = false; - bool aggregate_final = false; - bool complete = false; - ReadInOrderOptimizerPtr order_optimizer; - InputOrderInfoPtr input_order_info; - ManyExpressionActions group_by_elements_actions; - SortDescription group_by_elements_order_descr; - MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr; - MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr; - - /// Because projection analysis uses a separate interpreter. - ContextPtr context; -}; - /** Query along with some additional data, * that can be used during query processing * inside storage engines. @@ -180,7 +150,6 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query - ASTPtr original_query; /// Unmodified query for projection analysis /// Query tree QueryTreeNodePtr query_tree; @@ -207,8 +176,6 @@ struct SelectQueryInfo /// /// Configured in StorageDistributed::getQueryProcessingStage() ClusterPtr optimized_cluster; - /// should we use custom key with the cluster - bool use_custom_key = false; TreeRewriterResultPtr syntax_analyzer_result; @@ -242,18 +209,10 @@ struct SelectQueryInfo ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } - /// If not null, it means we choose a projection to execute current query. - std::optional projection; - bool ignore_projections = false; - bool is_projection_query = false; - bool merge_tree_empty_result = false; bool settings_limit_offset_done = false; bool is_internal = false; - Block minmax_count_projection_block; - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; - + bool parallel_replicas_disabled = false; bool is_parameterized_view = false; - bool optimize_trivial_count = false; // If limit is not 0, that means it's a trivial limit query. @@ -262,11 +221,6 @@ struct SelectQueryInfo /// For IStorageSystemOneBlock std::vector columns_mask; - InputOrderInfoPtr getInputOrderInfo() const - { - return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); - } - bool isFinal() const; }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 4f63b144f43..c7fbb633a82 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -40,15 +41,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - using namespace Azure::Storage::Blobs; namespace CurrentMetrics @@ -535,7 +527,12 @@ public: , format_settings(format_settings_) { StoredObject object(blob_path); - write_buf = wrapWriteBufferWithCompressionMethod(object_storage->writeObject(object, WriteMode::Rewrite), compression_method, 3); + const auto & settings = context->getSettingsRef(); + write_buf = wrapWriteBufferWithCompressionMethod( + object_storage->writeObject(object, WriteMode::Rewrite), + compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); } diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index cc908fa3215..16e5b9edfb6 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -4,6 +4,7 @@ #if USE_AZURE_BLOB_STORAGE +#include #include #include #include @@ -14,15 +15,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index ba5d922dc86..6f4b1563a46 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -212,8 +212,6 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage( { if (auto destination = getDestinationTable()) { - /// TODO: Find a way to support projections for StorageBuffer - query_info.ignore_projections = true; const auto & destination_metadata = destination->getInMemoryMetadataPtr(); return destination->getQueryProcessingStage(local_context, to_stage, destination->getStorageSnapshot(destination_metadata, local_context), query_info); } @@ -337,12 +335,12 @@ void StorageBuffer::read( pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, storage_snapshot)); pipe_from_buffers = Pipe::unitePipes(std::move(pipes_from_buffers)); - if (query_info.getInputOrderInfo()) + if (query_info.input_order_info) { /// Each buffer has one block, and it not guaranteed that rows in each block are sorted by order keys pipe_from_buffers.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, query_info.getInputOrderInfo()->sort_description_for_merging, 0); + return std::make_shared(header, query_info.input_order_info->sort_description_for_merging, 0); }); } } @@ -360,7 +358,7 @@ void StorageBuffer::read( /// TODO: Find a way to support projections for StorageBuffer auto interpreter = InterpreterSelectQuery( query_info.query, local_context, std::move(pipe_from_buffers), - SelectQueryOptions(processed_stage).ignoreProjections()); + SelectQueryOptions(processed_stage)); interpreter.addStorageLimits(*query_info.storage_limits); interpreter.buildQueryPlan(buffers_plan); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7ef2ff08827..a829002187b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -429,15 +429,10 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( size_t nodes = getClusterQueriedNodes(settings, cluster); - if (query_info.use_custom_key) - { - LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards"); - query_info.cluster = cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas); - } - else - { - query_info.cluster = cluster; + query_info.cluster = cluster; + if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + { if (nodes > 1 && settings.optimize_skip_unused_shards) { /// Always calculate optimized cluster here, to avoid conditions during read() @@ -880,30 +875,22 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - auto settings = local_context->getSettingsRef(); + const auto & settings = local_context->getSettingsRef(); ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (query_info.use_custom_key) + if (local_context->canUseParallelReplicasCustomKey(*query_info.getCluster())) { if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) { - if (query_info.getCluster()->getShardCount() == 1) - { - // we are reading from single shard with multiple replicas but didn't transform replicas - // into virtual shards with custom_key set - throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards"); - } - additional_shard_filter_generator = - [&, my_custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr + [my_custom_key_ast = std::move(custom_key_ast), + column_description = this->getInMemoryMetadataPtr()->columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + context = local_context, + replica_count = query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr { return getCustomKeyFilterForParallelReplica( - shard_count, - shard_num - 1, - my_custom_key_ast, - settings.parallel_replicas_custom_key_filter_type, - this->getInMemoryMetadataPtr()->columns, - local_context); + replica_count, replica_num - 1, my_custom_key_ast, custom_key_type, column_description, context); }; } } diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index d493fead993..beb93afc972 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -73,7 +73,7 @@ void StorageExternalDistributed::read( if (plans.empty()) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); } if (plans.size() == 1) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3c1e13679b5..9f864813de9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -57,15 +58,6 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace ProfileEvents { extern const Event CreatedReadBufferOrdinary; @@ -1397,7 +1389,7 @@ void StorageFile::read( throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p->at(0)); auto header = storage_snapshot->getSampleBlockForColumns(column_names); - InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info); return; } } @@ -1575,8 +1567,12 @@ public: /// In case of formats with prefixes if file is not empty we have already written prefix. bool do_not_write_prefix = naked_buffer->size(); - - write_buf = wrapWriteBufferWithCompressionMethod(std::move(naked_buffer), compression_method, 3); + const auto & settings = context->getSettingsRef(); + write_buf = wrapWriteBufferWithCompressionMethod( + std::move(naked_buffer), + compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, *write_buf, metadata_snapshot->getSampleBlock(), context, format_settings); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index b06fe00f5b2..ae616b1df04 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -149,10 +149,6 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( const StorageSnapshotPtr &, SelectQueryInfo & query_info) const { - /// TODO: Find a way to support projections for StorageMaterializedView. Why do we use different - /// metadata for materialized view and target table? If they are the same, we can get rid of all - /// converting and use it just like a normal view. - query_info.ignore_projections = true; const auto & target_metadata = getTargetTable()->getInMemoryMetadataPtr(); return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata, local_context), query_info); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 5d4f50baa53..0d67403fa2f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -278,8 +278,6 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( size_t selected_table_size = 0; - /// TODO: Find a way to support projections for StorageMerge - query_info.ignore_projections = true; for (const auto & iterator : database_table_iterators) { while (iterator->isValid()) @@ -854,7 +852,8 @@ QueryPlan ReadFromMerge::createPlanForTable( { InterpreterSelectQueryAnalyzer interpreter(modified_query_info.query_tree, modified_context, - SelectQueryOptions(processed_stage).ignoreProjections()); + SelectQueryOptions(processed_stage)); + auto & planner = interpreter.getPlanner(); planner.buildQueryPlanIfNeeded(); plan = std::move(planner).extractQueryPlan(); @@ -865,7 +864,8 @@ QueryPlan ReadFromMerge::createPlanForTable( /// TODO: Find a way to support projections for StorageMerge InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, - SelectQueryOptions(processed_stage).ignoreProjections()}; + SelectQueryOptions(processed_stage)}; + interpreter.buildQueryPlan(plan); } } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b8804ad3c6d..4761ccd8b58 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -250,7 +250,6 @@ void StorageMergeTree::read( local_context, max_block_size, num_streams, - processed_stage, nullptr, enable_parallel_reading)) query_plan = std::move(*plan); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 269ddf57fa2..18a1f9086ae 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -38,8 +38,6 @@ public: const StorageSnapshotPtr &, SelectQueryInfo & info) const override { - /// TODO: Find a way to support projections for StorageProxy - info.ignore_projections = true; const auto & nested_metadata = getNested()->getInMemoryMetadataPtr(); return getNested()->getQueryProcessingStage(context, to_stage, getNested()->getStorageSnapshot(nested_metadata, context), info); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a8404052c59..1d9a50b18b7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5345,12 +5345,12 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) - return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage); - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( @@ -5359,14 +5359,15 @@ void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) { auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); - auto plan = reader.read(column_names, storage_snapshot, query_info, local_context, - max_block_size, num_streams, processed_stage, std::move(max_added_blocks), - /* enable_parallel_reading= */false); + auto plan = reader.read( + column_names, storage_snapshot, query_info, local_context, + max_block_size, num_streams, std::move(max_added_blocks), + /* enable_parallel_reading=*/ false); + if (plan) query_plan = std::move(*plan); } @@ -5420,16 +5421,15 @@ void StorageReplicatedMergeTree::readLocalImpl( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, const size_t max_block_size, const size_t num_streams) { auto plan = reader.read( column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams, - processed_stage, /* max_block_numbers_to_read= */ nullptr, /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower()); + if (plan) query_plan = std::move(*plan); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 2bd1fcbc693..fb74097d768 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -561,7 +561,6 @@ private: const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams); @@ -571,7 +570,6 @@ private: const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d7cc86ed321..3ddbfe8d894 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -856,6 +858,7 @@ public: blob_log->query_id = context->getCurrentQueryId(); } + const auto & settings = context->getSettingsRef(); write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique( configuration_.client, @@ -868,7 +871,8 @@ public: threadPoolCallbackRunner(getIOThreadPool().get(), "S3ParallelWrite"), context->getWriteSettings()), compression_method, - 3); + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); } @@ -1377,7 +1381,7 @@ bool StorageS3::Configuration::update(ContextPtr context) request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context->getSettings()); - if (client && (static_configuration || s3_settings.auth_settings == auth_settings)) + if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) return false; auth_settings.updateFrom(s3_settings.auth_settings); @@ -1600,11 +1604,11 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context if (engine_args_to_idx.contains("session_token")) configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - configuration.auth_settings.no_sign_request = no_sign_request; + if (no_sign_request) + configuration.auth_settings.no_sign_request = no_sign_request; } - configuration.static_configuration = !configuration.auth_settings.access_key_id.empty(); + configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); configuration.keys = {configuration.url.key}; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index c0e4be36202..5041b95ecc8 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,13 +36,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -84,9 +84,9 @@ static const std::unordered_set optional_configuration_keys = /// Headers in config file will have structure "headers.header.name" and "headers.header.value". /// But Poco::AbstractConfiguration converts them into "header", "header[1]", "header[2]". -static const std::vector optional_regex_keys = { - std::regex(R"(headers.header\[[\d]*\].name)"), - std::regex(R"(headers.header\[[\d]*\].value)"), +static const std::vector> optional_regex_keys = { + std::make_shared(R"(headers.header\[[0-9]*\].name)"), + std::make_shared(R"(headers.header\[[0-9]*\].value)"), }; static bool urlWithGlobs(const String & uri) @@ -541,11 +541,12 @@ StorageURLSink::StorageURLSink( Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config ); + const auto & settings = context->getSettingsRef(); write_buf = wrapWriteBufferWithCompressionMethod( std::move(write_buffer), compression_method, - 3 - ); + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, format_settings); } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index e09b27adf32..3b2807965a4 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -38,7 +38,7 @@ public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); + storage_metadata.setColumns(Self::getColumnsDescription()); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp index 6a7d9a2a948..8e32a137fcb 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp @@ -4,11 +4,12 @@ namespace DB { -NamesAndTypesList StorageSystemAggregateFunctionCombinators::getNamesAndTypes() +ColumnsDescription StorageSystemAggregateFunctionCombinators::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_internal", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of the combinator."}, + {"is_internal", std::make_shared(), "Whether this combinator is for internal usage only."}, }; } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h index a2ef26109a3..6f3f90b58af 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h @@ -19,6 +19,6 @@ public: return "SystemAggregateFunctionCombinators"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemAsyncLoader.cpp b/src/Storages/System/StorageSystemAsyncLoader.cpp index aa182e9a4f3..c56a3c3ce78 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.cpp +++ b/src/Storages/System/StorageSystemAsyncLoader.cpp @@ -45,31 +45,32 @@ namespace } } -NamesAndTypesList StorageSystemAsyncLoader::getNamesAndTypes() +ColumnsDescription StorageSystemAsyncLoader::getColumnsDescription() { - return { - { "job", std::make_shared() }, - { "job_id", std::make_shared() }, - { "dependencies", std::make_shared(std::make_shared()) }, - { "dependencies_left", std::make_shared() }, - { "status", std::make_shared(getTypeEnumValues()) }, - { "is_executing", std::make_shared() }, - { "is_blocked", std::make_shared() }, - { "is_ready", std::make_shared() }, - { "elapsed", std::make_shared()}, - { "pool_id", std::make_shared() }, - { "pool", std::make_shared() }, - { "priority", std::make_shared() }, - { "execution_pool_id", std::make_shared() }, - { "execution_pool", std::make_shared() }, - { "execution_priority", std::make_shared() }, - { "ready_seqno", std::make_shared(std::make_shared()) }, - { "waiters", std::make_shared() }, - { "exception", std::make_shared(std::make_shared()) }, - { "schedule_time", std::make_shared(TIME_SCALE) }, - { "enqueue_time", std::make_shared(std::make_shared(TIME_SCALE)) }, - { "start_time", std::make_shared(std::make_shared(TIME_SCALE)) }, - { "finish_time", std::make_shared(std::make_shared(TIME_SCALE)) }, + return ColumnsDescription + { + {"job", std::make_shared(), "Job name (may be not unique)."}, + {"job_id", std::make_shared(), "Unique ID of the job."}, + {"dependencies", std::make_shared(std::make_shared()), "List of IDs of jobs that should be done before this job."}, + {"dependencies_left", std::make_shared(), "Current number of dependencies left to be done."}, + {"status", std::make_shared(getTypeEnumValues()), "Current load status of a job: PENDING: Load job is not started yet. OK: Load job executed and was successful. FAILED: Load job executed and failed. CANCELED: Load job is not going to be executed due to removal or dependency failure."}, + {"is_executing", std::make_shared(), "The job is currently being executed by a worker."}, + {"is_blocked", std::make_shared(), "The job waits for its dependencies to be done."}, + {"is_ready", std::make_shared(), "The job is ready to be executed and waits for a worker."}, + {"elapsed", std::make_shared(), "Seconds elapsed since start of execution. Zero if job is not started. Total execution time if job finished."}, + {"pool_id", std::make_shared(), "ID of a pool currently assigned to the job."}, + {"pool", std::make_shared(), "Name of `pool_id` pool."}, + {"priority", std::make_shared(), "Priority of `pool_id` pool."}, + {"execution_pool_id", std::make_shared(), "ID of a pool the job is executed in. Equals initially assigned pool before execution starts."}, + {"execution_pool", std::make_shared(), "Name of `execution_pool_id` pool."}, + {"execution_priority", std::make_shared(), "Priority of execution_pool_id pool."}, + {"ready_seqno", std::make_shared(std::make_shared()), "Not null for ready jobs. Worker pulls the next job to be executed from a ready queue of its pool. If there are multiple ready jobs, then job with the lowest value of `ready_seqno` is picked."}, + {"waiters", std::make_shared(), "The number of threads waiting on this job."}, + {"exception", std::make_shared(std::make_shared()), "Not null for failed and canceled jobs. Holds error message raised during query execution or error leading to cancelling of this job along with dependency failure chain of job names."}, + {"schedule_time", std::make_shared(TIME_SCALE), "Time when job was created and scheduled to be executed (usually with all its dependencies)."}, + {"enqueue_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet."}, + {"start_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet."}, + {"finish_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when job execution is finished. Null if the job is not finished yet."}, }; } diff --git a/src/Storages/System/StorageSystemAsyncLoader.h b/src/Storages/System/StorageSystemAsyncLoader.h index 28c17392d08..fa0ce11efe3 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.h +++ b/src/Storages/System/StorageSystemAsyncLoader.h @@ -15,7 +15,7 @@ class StorageSystemAsyncLoader final : public IStorageSystemOneBlock()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"format", std::make_shared()}, - {"first_update", std::make_shared(TIME_SCALE)}, - {"total_bytes", std::make_shared()}, - {"entries.query_id", std::make_shared(std::make_shared())}, - {"entries.bytes", std::make_shared(std::make_shared())}, + {"query", std::make_shared(), "Query text."}, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name."}, + {"format", std::make_shared(), "Format name."}, + {"first_update", std::make_shared(TIME_SCALE), "First insert time with microseconds resolution."}, + {"total_bytes", std::make_shared(), "Total number of bytes waiting in the queue."}, + {"entries.query_id", std::make_shared(std::make_shared()), "Array of query ids of the inserts waiting in the queue."}, + {"entries.bytes", std::make_shared(std::make_shared()), "Array of bytes of each insert query waiting in the queue."}, }; } diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.h b/src/Storages/System/StorageSystemAsynchronousInserts.h index 01e51dca6fa..891494ffbeb 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.h +++ b/src/Storages/System/StorageSystemAsynchronousInserts.h @@ -12,7 +12,7 @@ class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + return ColumnsDescription + { + {"metric", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, + {"description", std::make_shared(), "Metric description."}, }; } diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.h b/src/Storages/System/StorageSystemAsynchronousMetrics.h index e5b2070e96f..026377c77a0 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.h +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.h @@ -18,7 +18,7 @@ public: std::string getName() const override { return "SystemAsynchronousMetrics"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); private: const AsynchronousMetrics & async_metrics; diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index a5dd7ea6e0b..17fb56e0a92 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -15,26 +15,26 @@ namespace DB { -NamesAndTypesList StorageSystemBackups::getNamesAndTypes() +ColumnsDescription StorageSystemBackups::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"id", std::make_shared()}, - {"name", std::make_shared()}, - {"base_backup_name", std::make_shared()}, - {"status", std::make_shared(getBackupStatusEnumValues())}, - {"error", std::make_shared()}, - {"start_time", std::make_shared()}, - {"end_time", std::make_shared()}, - {"num_files", std::make_shared()}, - {"total_size", std::make_shared()}, - {"num_entries", std::make_shared()}, - {"uncompressed_size", std::make_shared()}, - {"compressed_size", std::make_shared()}, - {"files_read", std::make_shared()}, - {"bytes_read", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + return ColumnsDescription + { + {"id", std::make_shared(), "Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID."}, + {"name", std::make_shared(), "Operation name, a string like `Disk('backups', 'my_backup')`"}, + {"base_backup_name", std::make_shared(), "Base Backup Operation name, a string like `Disk('backups', 'my_base_backup')`"}, + {"status", std::make_shared(getBackupStatusEnumValues()), "Status of backup or restore operation."}, + {"error", std::make_shared(), "The error message if any."}, + {"start_time", std::make_shared(), "The time when operation started."}, + {"end_time", std::make_shared(), "The time when operation finished."}, + {"num_files", std::make_shared(), "The number of files stored in the backup."}, + {"total_size", std::make_shared(), "The total size of files stored in the backup."}, + {"num_entries", std::make_shared(), "The number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder."}, + {"uncompressed_size", std::make_shared(), "The uncompressed size of the backup."}, + {"compressed_size", std::make_shared(), "The compressed size of the backup."}, + {"files_read", std::make_shared(), "Returns the number of files read during RESTORE from this backup."}, + {"bytes_read", std::make_shared(), "Returns the total size of files read during RESTORE from this backup."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "All the profile events captured during this operation."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemBackups.h b/src/Storages/System/StorageSystemBackups.h index f23cd1a223a..a081bd52b4c 100644 --- a/src/Storages/System/StorageSystemBackups.h +++ b/src/Storages/System/StorageSystemBackups.h @@ -11,7 +11,7 @@ class StorageSystemBackups final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the build option."}, + {"value", std::make_shared(), "Value of the build option."}, }; } diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 796b134ba56..a81bcb08bfc 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -63,6 +63,7 @@ const char * auto_config_build[] "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", "USE_QPL", "@ENABLE_QPL@", + "USE_QAT", "@ENABLE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", diff --git a/src/Storages/System/StorageSystemBuildOptions.h b/src/Storages/System/StorageSystemBuildOptions.h index 0fe891c11e3..7c0bbf6b5fd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.h +++ b/src/Storages/System/StorageSystemBuildOptions.h @@ -22,7 +22,7 @@ public: std::string getName() const override { return "SystemBuildOptions"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 7919fe81d66..c372e4b32b0 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -1,4 +1,5 @@ #include "config.h" +#include #include #include #include @@ -13,21 +14,13 @@ #include "Poco/Crypto/X509Certificate.h" #endif -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - namespace DB { -NamesAndTypesList StorageSystemCertificates::getNamesAndTypes() +ColumnsDescription StorageSystemCertificates::getColumnsDescription() { - return + /// TODO: Fill in all the comments. + return ColumnsDescription { {"version", std::make_shared>()}, {"serial_number", std::make_shared(std::make_shared())}, diff --git a/src/Storages/System/StorageSystemCertificates.h b/src/Storages/System/StorageSystemCertificates.h index 4df0010d0b4..f8c8477c998 100644 --- a/src/Storages/System/StorageSystemCertificates.h +++ b/src/Storages/System/StorageSystemCertificates.h @@ -18,7 +18,7 @@ class StorageSystemCertificates final : public IStorageSystemOneBlock()}, - {"shard_num", std::make_shared()}, - {"shard_weight", std::make_shared()}, - {"internal_replication", std::make_shared()}, - {"replica_num", std::make_shared()}, - {"host_name", std::make_shared()}, - {"host_address", std::make_shared()}, - {"port", std::make_shared()}, - {"is_local", std::make_shared()}, - {"user", std::make_shared()}, - {"default_database", std::make_shared()}, - {"errors_count", std::make_shared()}, - {"slowdowns_count", std::make_shared()}, - {"estimated_recovery_time", std::make_shared()}, - {"database_shard_name", std::make_shared()}, - {"database_replica_name", std::make_shared()}, - {"is_active", std::make_shared(std::make_shared())}, + {"cluster", std::make_shared(), "The cluster name."}, + {"shard_num", std::make_shared(), "The shard number in the cluster, starting from 1."}, + {"shard_weight", std::make_shared(), "The relative weight of the shard when writing data."}, + {"internal_replication", std::make_shared(), "Flag that indicates whether this host is a part on ensemble which can replicate the data on its own."}, + {"replica_num", std::make_shared(), "The replica number in the shard, starting from 1."}, + {"host_name", std::make_shared(), "The host name, as specified in the config."}, + {"host_address", std::make_shared(), "The host IP address obtained from DNS."}, + {"port", std::make_shared(), "The port to use for connecting to the server."}, + {"is_local", std::make_shared(), "Flag that indicates whether the host is local."}, + {"user", std::make_shared(), "The name of the user for connecting to the server."}, + {"default_database", std::make_shared(), "The default database name."}, + {"errors_count", std::make_shared(), "The number of times this host failed to reach replica."}, + {"slowdowns_count", std::make_shared(), "The number of slowdowns that led to changing replica when establishing a connection with hedged requests."}, + {"estimated_recovery_time", std::make_shared(), "Seconds remaining until the replica error count is zeroed and it is considered to be back to normal."}, + {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, + {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, + {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, }; -} -NamesAndAliases StorageSystemClusters::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "cluster"}, - }; + }); + + return description; } void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 071ad423b89..7b568641cb2 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -20,9 +20,7 @@ class StorageSystemClusters final : public IStorageSystemOneBlock()}, - {"language", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the collation."}, + {"language", std::make_shared(std::make_shared()), "The language."}, }; } diff --git a/src/Storages/System/StorageSystemCollations.h b/src/Storages/System/StorageSystemCollations.h index 8440cfa3ebc..1fc0ff0e024 100644 --- a/src/Storages/System/StorageSystemCollations.h +++ b/src/Storages/System/StorageSystemCollations.h @@ -15,7 +15,7 @@ protected: public: std::string getName() const override { return "SystemTableCollations"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemContributors.cpp b/src/Storages/System/StorageSystemContributors.cpp index ed28be2a4ab..860a96c4388 100644 --- a/src/Storages/System/StorageSystemContributors.cpp +++ b/src/Storages/System/StorageSystemContributors.cpp @@ -9,10 +9,11 @@ extern const char * auto_contributors[]; namespace DB { -NamesAndTypesList StorageSystemContributors::getNamesAndTypes() +ColumnsDescription StorageSystemContributors::getColumnsDescription() { - return { - {"name", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Contributor (author) name from git log."}, }; } diff --git a/src/Storages/System/StorageSystemContributors.h b/src/Storages/System/StorageSystemContributors.h index ba8c930118e..ed983c5e61f 100644 --- a/src/Storages/System/StorageSystemContributors.h +++ b/src/Storages/System/StorageSystemContributors.h @@ -22,6 +22,6 @@ public: return "SystemContributors"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemCurrentRoles.cpp b/src/Storages/System/StorageSystemCurrentRoles.cpp index cf7df0b8b99..88bdf088175 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.cpp +++ b/src/Storages/System/StorageSystemCurrentRoles.cpp @@ -11,14 +11,14 @@ namespace DB { -NamesAndTypesList StorageSystemCurrentRoles::getNamesAndTypes() +ColumnsDescription StorageSystemCurrentRoles::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"role_name", std::make_shared()}, - {"with_admin_option", std::make_shared()}, - {"is_default", std::make_shared()}, + return ColumnsDescription + { + {"role_name", std::make_shared(), "Role name."}, + {"with_admin_option", std::make_shared(), "1 if the role has ADMIN OPTION privilege."}, + {"is_default", std::make_shared(), "1 if role is set to be a default."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemCurrentRoles.h b/src/Storages/System/StorageSystemCurrentRoles.h index bf62b81d422..4cc9b11d3f4 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.h +++ b/src/Storages/System/StorageSystemCurrentRoles.h @@ -12,7 +12,7 @@ class StorageSystemCurrentRoles final : public IStorageSystemOneBlock> getStatusEnumsAndValues() }; } -NamesAndTypesList StorageSystemDDLWorkerQueue::getNamesAndTypes() +ColumnsDescription StorageSystemDDLWorkerQueue::getColumnsDescription() { - return { - {"entry", std::make_shared()}, - {"entry_version", std::make_shared(std::make_shared())}, - {"initiator_host", std::make_shared(std::make_shared())}, - {"initiator_port", std::make_shared(std::make_shared())}, - {"cluster", std::make_shared()}, - {"query", std::make_shared()}, - {"settings", std::make_shared(std::make_shared(), std::make_shared())}, - {"query_create_time", std::make_shared()}, + return ColumnsDescription + { + {"entry", std::make_shared(), "Query id."}, + {"entry_version", std::make_shared(std::make_shared()), "Version of the entry."}, + {"initiator_host", std::make_shared(std::make_shared()), "Host that initiated the DDL operation."}, + {"initiator_port", std::make_shared(std::make_shared()), "Port used by the initiator."}, + {"cluster", std::make_shared(), "Cluster name."}, + {"query", std::make_shared(), "Query executed."}, + {"settings", std::make_shared(std::make_shared(), std::make_shared()), "Settings used in the DDL operation."}, + {"query_create_time", std::make_shared(), "Query created time."}, - {"host", std::make_shared(std::make_shared())}, - {"port", std::make_shared(std::make_shared())}, - {"status", std::make_shared(std::make_shared(getStatusEnumsAndValues()))}, - {"exception_code", std::make_shared(std::make_shared())}, - {"exception_text", std::make_shared(std::make_shared())}, - {"query_finish_time", std::make_shared(std::make_shared())}, - {"query_duration_ms", std::make_shared(std::make_shared())}, + {"host", std::make_shared(std::make_shared()), "Hostname."}, + {"port", std::make_shared(std::make_shared()), "Host Port."}, + {"status", std::make_shared(std::make_shared(getStatusEnumsAndValues())), "Status of the query."}, + {"exception_code", std::make_shared(std::make_shared()), "Exception code."}, + {"exception_text", std::make_shared(std::make_shared()), "Exception message."}, + {"query_finish_time", std::make_shared(std::make_shared()), "Query finish time."}, + {"query_duration_ms", std::make_shared(std::make_shared()), "Duration of query execution (in milliseconds)."}, }; } diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.h b/src/Storages/System/StorageSystemDDLWorkerQueue.h index 797aff5cf98..871bb706f94 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.h +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.h @@ -21,6 +21,6 @@ protected: public: std::string getName() const override { return "SystemDDLWorkerQueue"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 3e22a6c0664..7e545757129 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -5,12 +5,13 @@ namespace DB { -NamesAndTypesList StorageSystemDashboards::getNamesAndTypes() +ColumnsDescription StorageSystemDashboards::getColumnsDescription() { - return { - {"dashboard", std::make_shared()}, - {"title", std::make_shared()}, - {"query", std::make_shared()}, + return ColumnsDescription + { + {"dashboard", std::make_shared(), "The dashboard name."}, + {"title", std::make_shared(), "The title of a chart."}, + {"query", std::make_shared(), "The query to obtain data to be displayed."}, }; } diff --git a/src/Storages/System/StorageSystemDashboards.h b/src/Storages/System/StorageSystemDashboards.h index cbd7c5bbf57..83a8664ad27 100644 --- a/src/Storages/System/StorageSystemDashboards.h +++ b/src/Storages/System/StorageSystemDashboards.h @@ -17,7 +17,7 @@ class StorageSystemDashboards final : public IStorageSystemOneBlock()}, - {"case_insensitive", std::make_shared()}, - {"alias_to", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Data type name."}, + {"case_insensitive", std::make_shared(), "Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid."}, + {"alias_to", std::make_shared(), "Data type name for which `name` is an alias."}, }; } diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.h b/src/Storages/System/StorageSystemDataTypeFamilies.h index 25e20d2cd76..2cb834f6931 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.h +++ b/src/Storages/System/StorageSystemDataTypeFamilies.h @@ -15,7 +15,7 @@ protected: public: std::string getName() const override { return "SystemTableDataTypeFamilies"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDatabaseEngines.cpp b/src/Storages/System/StorageSystemDatabaseEngines.cpp index fb878a0eda0..514ca6d0ab3 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.cpp +++ b/src/Storages/System/StorageSystemDatabaseEngines.cpp @@ -6,10 +6,11 @@ namespace DB { -NamesAndTypesList StorageSystemDatabaseEngines::getNamesAndTypes() +ColumnsDescription StorageSystemDatabaseEngines::getColumnsDescription() { - return { - {"name", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of database engine."}, }; } diff --git a/src/Storages/System/StorageSystemDatabaseEngines.h b/src/Storages/System/StorageSystemDatabaseEngines.h index cd0ee541633..16b517c91e6 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.h +++ b/src/Storages/System/StorageSystemDatabaseEngines.h @@ -16,7 +16,7 @@ protected: public: std::string getName() const override { return "SystemDatabaseEngines"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 6dbe780193d..0ffed6c9771 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -15,24 +15,24 @@ namespace DB { -NamesAndTypesList StorageSystemDatabases::getNamesAndTypes() +ColumnsDescription StorageSystemDatabases::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"engine", std::make_shared()}, - {"data_path", std::make_shared()}, - {"metadata_path", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine_full", std::make_shared()}, - {"comment", std::make_shared()} + auto description = ColumnsDescription + { + {"name", std::make_shared(), "Database name."}, + {"engine", std::make_shared(), "Database engine."}, + {"data_path", std::make_shared(), "Data path."}, + {"metadata_path", std::make_shared(), "Metadata path."}, + {"uuid", std::make_shared(), "Database UUID."}, + {"engine_full", std::make_shared(), "Parameters of the database engine."}, + {"comment", std::make_shared(), "Database comment."} }; -} -NamesAndAliases StorageSystemDatabases::getNamesAndAliases() -{ - return { + description.setAliases({ {"database", std::make_shared(), "name"} - }; + }); + + return description; } static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database) diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 29dd786ca0a..2fd9ccdc970 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -19,9 +19,7 @@ public: return "SystemDatabases"; } - static NamesAndTypesList getNamesAndTypes(); - - static NamesAndAliases getNamesAndAliases(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index 3299365b4ac..c2ed35c5510 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -52,34 +52,43 @@ catch (const DB::Exception &) } -NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() +ColumnsDescription StorageSystemDictionaries::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"status", std::make_shared(getStatusEnumAllPossibleValues())}, - {"origin", std::make_shared()}, - {"type", std::make_shared()}, - {"key.names", std::make_shared(std::make_shared())}, - {"key.types", std::make_shared(std::make_shared())}, - {"attribute.names", std::make_shared(std::make_shared())}, - {"attribute.types", std::make_shared(std::make_shared())}, - {"bytes_allocated", std::make_shared()}, - {"hierarchical_index_bytes_allocated", std::make_shared()}, - {"query_count", std::make_shared()}, - {"hit_rate", std::make_shared()}, - {"found_rate", std::make_shared()}, - {"element_count", std::make_shared()}, - {"load_factor", std::make_shared()}, - {"source", std::make_shared()}, - {"lifetime_min", std::make_shared()}, - {"lifetime_max", std::make_shared()}, - {"loading_start_time", std::make_shared()}, - {"last_successful_update_time", std::make_shared()}, - {"loading_duration", std::make_shared()}, - {"last_exception", std::make_shared()}, - {"comment", std::make_shared()} + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries."}, + {"name", std::make_shared(), "Dictionary name."}, + {"uuid", std::make_shared(), "Dictionary UUID."}, + {"status", std::make_shared(getStatusEnumAllPossibleValues()), + "Dictionary status. Possible values: " + "NOT_LOADED — Dictionary was not loaded because it was not used, " + "LOADED — Dictionary loaded successfully, " + "FAILED — Unable to load the dictionary as a result of an error, " + "LOADING — Dictionary is loading now, " + "LOADED_AND_RELOADING — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: SYSTEM RELOAD DICTIONARY query, timeout, dictionary config has changed), " + "FAILED_AND_RELOADING — Could not load the dictionary as a result of an error and is loading now." + }, + {"origin", std::make_shared(), "Path to the configuration file that describes the dictionary."}, + {"type", std::make_shared(), "Type of a dictionary allocation. Storing Dictionaries in Memory."}, + {"key.names", std::make_shared(std::make_shared()), "Array of key names provided by the dictionary."}, + {"key.types", std::make_shared(std::make_shared()), "Corresponding array of key types provided by the dictionary."}, + {"attribute.names", std::make_shared(std::make_shared()), "Array of attribute names provided by the dictionary."}, + {"attribute.types", std::make_shared(std::make_shared()), "Corresponding array of attribute types provided by the dictionary."}, + {"bytes_allocated", std::make_shared(), "Amount of RAM allocated for the dictionary."}, + {"hierarchical_index_bytes_allocated", std::make_shared(), ""}, + {"query_count", std::make_shared(), "Number of queries since the dictionary was loaded or since the last successful reboot."}, + {"hit_rate", std::make_shared(), "For cache dictionaries, the percentage of uses for which the value was in the cache."}, + {"found_rate", std::make_shared(), "The percentage of uses for which the value was found."}, + {"element_count", std::make_shared(), "Number of items stored in the dictionary."}, + {"load_factor", std::make_shared(), "Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table)."}, + {"source", std::make_shared(), "Text describing the data source for the dictionary."}, + {"lifetime_min", std::make_shared(), "Minimum lifetime of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if invalidate_query is set, then only if it has changed). Set in seconds."}, + {"lifetime_max", std::make_shared(), "Maximum lifetime of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if invalidate_query is set, then only if it has changed). Set in seconds."}, + {"loading_start_time", std::make_shared(), "Start time for loading the dictionary."}, + {"last_successful_update_time", std::make_shared(), "End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes."}, + {"loading_duration", std::make_shared(), "Duration of a dictionary loading."}, + {"last_exception", std::make_shared(), "Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created."}, + {"comment", std::make_shared(), "Text of the comment to dictionary."} }; } diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 6d4234362e5..792b3c0dd30 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -14,7 +14,7 @@ class StorageSystemDictionaries final : public IStorageSystemOneBlock() }, - { "table", std::make_shared() }, - { "data_path", std::make_shared() }, - { "is_blocked", std::make_shared() }, - { "error_count", std::make_shared() }, - { "data_files", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "broken_data_files", std::make_shared() }, - { "broken_data_compressed_bytes", std::make_shared() }, - { "last_exception", std::make_shared() }, - { "last_exception_time", std::make_shared() }, + return ColumnsDescription + { + { "database", std::make_shared(), "Name of the database."}, + { "table", std::make_shared(), "Name of the table."}, + { "data_path", std::make_shared(), "Path to the folder with local files."}, + { "is_blocked", std::make_shared(), "Flag indicates whether sending local files to the server is blocked."}, + { "error_count", std::make_shared(), "Number of errors."}, + { "data_files", std::make_shared(), "Number of local files in a folder."}, + { "data_compressed_bytes", std::make_shared(), "Size of compressed data in local files, in bytes."}, + { "broken_data_files", std::make_shared(), "Number of files that has been marked as broken (due to an error)."}, + { "broken_data_compressed_bytes", std::make_shared(), "Size of compressed data in broken files, in bytes."}, + { "last_exception", std::make_shared(), "Text message about the last error that occurred (if any)."}, + { "last_exception_time", std::make_shared(), "Time when last exception occurred."}, }; } diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h index 63c5d40f07b..477a9d6e245 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.h +++ b/src/Storages/System/StorageSystemDistributionQueue.h @@ -16,7 +16,7 @@ class StorageSystemDistributionQueue final : public IStorageSystemOneBlock()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine", std::make_shared()}, - {"metadata_dropped_path", std::make_shared()}, - {"table_dropped_time", std::make_shared()}, + return ColumnsDescription + { + {"index", std::make_shared(), "Index in marked_dropped_tables queue."}, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name."}, + {"uuid", std::make_shared(), "Table UUID."}, + {"engine", std::make_shared(), "Table engine name."}, + {"metadata_dropped_path", std::make_shared(), "Path of table's metadata file in metadata_dropped directory."}, + {"table_dropped_time", std::make_shared(), "The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemDroppedTables.h b/src/Storages/System/StorageSystemDroppedTables.h index 44cc8122603..d7c3569eb62 100644 --- a/src/Storages/System/StorageSystemDroppedTables.h +++ b/src/Storages/System/StorageSystemDroppedTables.h @@ -10,7 +10,7 @@ class StorageSystemDroppedTables final : public IStorageSystemOneBlock +#include +#include +#include +#include +#include + + +namespace DB +{ + + +StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context) + : StoragesInfoStreamBase(context) +{ + /// Will apply WHERE to subset of columns and then add more columns. + /// This is kind of complicated, but we use WHERE to do less work. + + Block block_to_filter; + + MutableColumnPtr database_column_mut = ColumnString::create(); + MutableColumnPtr table_column_mut = ColumnString::create(); + MutableColumnPtr engine_column_mut = ColumnString::create(); + MutableColumnPtr active_column_mut = ColumnUInt8::create(); + MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create(); + + const auto access = context->getAccess(); + const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); + + auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); + for (const auto & dropped_table : tables_mark_dropped) + { + StoragePtr storage = dropped_table.table; + if (!storage) + continue; + + UUID storage_uuid = storage->getStorageID().uuid; + String database_name = storage->getStorageID().getDatabaseName(); + String table_name = storage->getStorageID().getTableName(); + String engine_name = storage->getName(); +#if USE_MYSQL + if (auto * proxy = dynamic_cast(storage.get())) + { + auto nested = proxy->getNested(); + storage.swap(nested); + } +#endif + if (!dynamic_cast(storage.get())) + continue; + + if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + storages[storage_uuid] = storage; + + /// Add all combinations of flag 'active'. + for (UInt64 active : {0, 1}) + { + database_column_mut->insert(database_name); + table_column_mut->insert(table_name); + engine_column_mut->insert(engine_name); + active_column_mut->insert(active); + storage_uuid_column_mut->insert(storage_uuid); + } + } + + block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); + block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared(), "table")); + block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared(), "engine")); + block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared(), "active")); + block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared(), "uuid")); + + if (block_to_filter.rows()) + { + /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + rows = block_to_filter.rows(); + } + + database_column = block_to_filter.getByName("database").column; + table_column = block_to_filter.getByName("table").column; + active_column = block_to_filter.getByName("active").column; + storage_uuid_column = block_to_filter.getByName("uuid").column; +} + + +} diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h new file mode 100644 index 00000000000..f548697a6a9 --- /dev/null +++ b/src/Storages/System/StorageSystemDroppedTablesParts.h @@ -0,0 +1,39 @@ +#pragma once + +#include + + +namespace DB +{ + +class StoragesDroppedInfoStream : public StoragesInfoStreamBase +{ +public: + StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context); +protected: + bool tryLockTable(StoragesInfo &) override + { + // we don't need to lock a dropped table + return true; + } +}; + +class Context; + + +/** Implements system table 'dropped_tables_parts' which allows to get information about data parts for dropped but not yet removed tables. + */ +class StorageSystemDroppedTablesParts final : public StorageSystemParts +{ +public: + explicit StorageSystemDroppedTablesParts(const StorageID & table_id) : StorageSystemParts(table_id) {} + + std::string getName() const override { return "SystemDroppedTablesParts"; } +protected: + std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) override + { + return std::make_unique(query_info, context); + } +}; + +} diff --git a/src/Storages/System/StorageSystemEnabledRoles.cpp b/src/Storages/System/StorageSystemEnabledRoles.cpp index eec2f24c5b2..6dbb6f18488 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.cpp +++ b/src/Storages/System/StorageSystemEnabledRoles.cpp @@ -11,15 +11,15 @@ namespace DB { -NamesAndTypesList StorageSystemEnabledRoles::getNamesAndTypes() +ColumnsDescription StorageSystemEnabledRoles::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"role_name", std::make_shared()}, - {"with_admin_option", std::make_shared()}, - {"is_current", std::make_shared()}, - {"is_default", std::make_shared()}, + return ColumnsDescription + { + {"role_name", std::make_shared(), "Role name."}, + {"with_admin_option", std::make_shared(), "1 if the role has ADMIN OPTION privilege."}, + {"is_current", std::make_shared(), "Flag that shows whether `enabled_role` is a current role of a current user."}, + {"is_default", std::make_shared(), "Flag that shows whether `enabled_role` is a default role."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemEnabledRoles.h b/src/Storages/System/StorageSystemEnabledRoles.h index 93e8e0b5311..5367b2ccbea 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.h +++ b/src/Storages/System/StorageSystemEnabledRoles.h @@ -12,7 +12,7 @@ class StorageSystemEnabledRoles final : public IStorageSystemOneBlock() }, - { "code", std::make_shared() }, - { "value", std::make_shared() }, - { "last_error_time", std::make_shared() }, - { "last_error_message", std::make_shared() }, - { "last_error_trace", std::make_shared(std::make_shared()) }, - { "remote", std::make_shared() }, + return ColumnsDescription + { + { "name", std::make_shared(), "Name of the error (errorCodeToName)."}, + { "code", std::make_shared(), "Code number of the error."}, + { "value", std::make_shared(), "The number of times this error happened."}, + { "last_error_time", std::make_shared(), "The time when the last error happened."}, + { "last_error_message", std::make_shared(), "Message for the last error."}, + { "last_error_trace", std::make_shared(std::make_shared()), "A stack trace that represents a list of physical addresses where the called methods are stored."}, + { "remote", std::make_shared(), "Remote exception (i.e. received during one of the distributed queries)."}, }; } diff --git a/src/Storages/System/StorageSystemErrors.h b/src/Storages/System/StorageSystemErrors.h index f44ae9c8025..9e8ec628bac 100644 --- a/src/Storages/System/StorageSystemErrors.h +++ b/src/Storages/System/StorageSystemErrors.h @@ -18,7 +18,7 @@ class StorageSystemErrors final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + auto description = ColumnsDescription + { + {"event", std::make_shared(), "Event name."}, + {"value", std::make_shared(), "Number of events occurred."}, + {"description", std::make_shared(), "Event description."}, }; -} -NamesAndAliases StorageSystemEvents::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "event"} - }; + }); + + return description; } void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index b2e4bac072a..cbd92f90d7a 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -15,9 +15,7 @@ class StorageSystemEvents final : public IStorageSystemOneBlock()}, {"cache_base_path", std::make_shared()}, {"cache_path", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index cc5c8d12f79..4b13b375f95 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -7,7 +7,7 @@ namespace DB { /** - * Usgae example. How to get mapping from local paths to remote paths: + * Usage example. How to get mapping from local paths to remote paths: * SELECT * cache_path, * cache_hits, @@ -36,7 +36,7 @@ public: std::string getName() const override { return "SystemFilesystemCache"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index daac1e60057..a360971e1f7 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -6,14 +6,15 @@ namespace DB { -NamesAndTypesList StorageSystemFormats::getNamesAndTypes() +ColumnsDescription StorageSystemFormats::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_input", std::make_shared()}, - {"is_output", std::make_shared()}, - {"supports_parallel_parsing", std::make_shared()}, - {"supports_parallel_formatting", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Format name."}, + {"is_input", std::make_shared(), "Flag that indicates whether the format is suitable for data input."}, + {"is_output", std::make_shared(), "Flag that indicates whether the format is suitable for data output."}, + {"supports_parallel_parsing", std::make_shared(), "Flag that indicates whether the format supports parallel parsing."}, + {"supports_parallel_formatting", std::make_shared(), "Flag that indicates whether the format supports parallel formatting."}, }; } diff --git a/src/Storages/System/StorageSystemFormats.h b/src/Storages/System/StorageSystemFormats.h index d7631066020..9f9d1df1bde 100644 --- a/src/Storages/System/StorageSystemFormats.h +++ b/src/Storages/System/StorageSystemFormats.h @@ -17,6 +17,6 @@ public: return "SystemFormats"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 9809b9435f2..45c00e6de27 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -113,22 +113,23 @@ std::vector> getOriginEnumsAndValues() }; } -NamesAndTypesList StorageSystemFunctions::getNamesAndTypes() +ColumnsDescription StorageSystemFunctions::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_aggregate", std::make_shared()}, - {"is_deterministic", std::make_shared(std::make_shared())}, - {"case_insensitive", std::make_shared()}, - {"alias_to", std::make_shared()}, - {"create_query", std::make_shared()}, - {"origin", std::make_shared(getOriginEnumsAndValues())}, - {"description", std::make_shared()}, - {"syntax", std::make_shared()}, - {"arguments", std::make_shared()}, - {"returned_value", std::make_shared()}, - {"examples", std::make_shared()}, - {"categories", std::make_shared()} + return ColumnsDescription + { + {"name", std::make_shared(), "The name of the function."}, + {"is_aggregate", std::make_shared(), "Whether the function is an aggregate function."}, + {"is_deterministic", std::make_shared(std::make_shared()), "Whether the function is deterministic."}, + {"case_insensitive", std::make_shared(), "Whether the function name can be used case-insensitively."}, + {"alias_to", std::make_shared(), "The original function name, if the function name is an alias."}, + {"create_query", std::make_shared(), "Obsolete."}, + {"origin", std::make_shared(getOriginEnumsAndValues()), "Obsolete."}, + {"description", std::make_shared(), "A high-level description what the function does."}, + {"syntax", std::make_shared(), "Signature of the function."}, + {"arguments", std::make_shared(), "What arguments does the function take."}, + {"returned_value", std::make_shared(), "What does the function return."}, + {"examples", std::make_shared(), "Usage example."}, + {"categories", std::make_shared(), "The category of the function."} }; } diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index 606694a4c0b..ac1129e8127 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -17,7 +17,7 @@ class StorageSystemFunctions final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index 461efd7f640..f5f3fa07e53 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -18,19 +18,23 @@ namespace DB { -NamesAndTypesList StorageSystemGrants::getNamesAndTypes() +ColumnsDescription StorageSystemGrants::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"user_name", std::make_shared(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues())}, - {"database", std::make_shared(std::make_shared())}, - {"table", std::make_shared(std::make_shared())}, - {"column", std::make_shared(std::make_shared())}, - {"is_partial_revoke", std::make_shared()}, - {"grant_option", std::make_shared()}, + return ColumnsDescription + { + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role assigned to user account."}, + {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues()), "Access parameters for ClickHouse user account."}, + {"database", std::make_shared(std::make_shared()), "Name of a database."}, + {"table", std::make_shared(std::make_shared()), "Name of a table."}, + {"column", std::make_shared(std::make_shared()), "Name of a column to which access is granted."}, + {"is_partial_revoke", std::make_shared(), + "Logical value. It shows whether some privileges have been revoked. Possible values: " + "0 — The row describes a partial revoke, " + "1 — The row describes a grant." + }, + {"grant_option", std::make_shared(), "Permission is granted WITH GRANT OPTION."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemGrants.h b/src/Storages/System/StorageSystemGrants.h index 48d95f487a1..2202b52ad5f 100644 --- a/src/Storages/System/StorageSystemGrants.h +++ b/src/Storages/System/StorageSystemGrants.h @@ -12,7 +12,7 @@ class StorageSystemGrants final : public IStorageSystemOneBlock()}, - {"rule_type", std::make_shared()}, - {"regexp", std::make_shared()}, - {"function", std::make_shared()}, - {"age", std::make_shared()}, - {"precision", std::make_shared()}, - {"priority", std::make_shared()}, - {"is_default", std::make_shared()}, - {"Tables.database", std::make_shared(std::make_shared())}, - {"Tables.table", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"config_name", std::make_shared(), "graphite_rollup parameter name."}, + {"rule_type", std::make_shared(), ""}, + {"regexp", std::make_shared(), "A pattern for the metric name."}, + {"function", std::make_shared(), "The name of the aggregating function."}, + {"age", std::make_shared(), "The minimum age of the data in seconds."}, + {"precision", std::make_shared(), "How precisely to define the age of the data in seconds."}, + {"priority", std::make_shared(), "Pattern priority."}, + {"is_default", std::make_shared(), "Whether the pattern is the default."}, + {"Tables.database", std::make_shared(std::make_shared()), "Array of names of database tables that use the `config_name` parameter."}, + {"Tables.table", std::make_shared(std::make_shared()), "Array of table names that use the `config_name` parameter."}, }; } diff --git a/src/Storages/System/StorageSystemGraphite.h b/src/Storages/System/StorageSystemGraphite.h index 608dac79133..be101181cf7 100644 --- a/src/Storages/System/StorageSystemGraphite.h +++ b/src/Storages/System/StorageSystemGraphite.h @@ -15,7 +15,7 @@ class StorageSystemGraphite final : public IStorageSystemOneBlock() }, - { "large", std::make_shared() }, - { "size", std::make_shared() }, - { "allocations", std::make_shared() }, - { "deallocations", std::make_shared() }, + return ColumnsDescription + { + { "index", std::make_shared(), "Index of the bin ordered by size."}, + { "large", std::make_shared(), "True for large allocations and False for small."}, + { "size", std::make_shared(), "Size of allocations in this bin."}, + { "allocations", std::make_shared(), "Number of allocations."}, + { "deallocations", std::make_shared(), "Number of deallocations."}, }; } diff --git a/src/Storages/System/StorageSystemJemalloc.h b/src/Storages/System/StorageSystemJemalloc.h index a4ac2fbcdcb..0cd29d99131 100644 --- a/src/Storages/System/StorageSystemJemalloc.h +++ b/src/Storages/System/StorageSystemJemalloc.h @@ -15,7 +15,7 @@ public: std::string getName() const override { return "SystemJemallocBins"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); Pipe read( const Names & column_names, diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index e333f6e2c15..95962d8de8b 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -22,29 +22,29 @@ namespace DB { -NamesAndTypesList StorageSystemKafkaConsumers::getNamesAndTypes() +ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"consumer_id", std::make_shared()}, //(number? or string? - single clickhouse table can have many consumers) - {"assignments.topic", std::make_shared(std::make_shared())}, - {"assignments.partition_id", std::make_shared(std::make_shared())}, - {"assignments.current_offset", std::make_shared(std::make_shared())}, - {"exceptions.time", std::make_shared(std::make_shared())}, - {"exceptions.text", std::make_shared(std::make_shared())}, - {"last_poll_time", std::make_shared()}, - {"num_messages_read", std::make_shared()}, - {"last_commit_time", std::make_shared()}, - {"num_commits", std::make_shared()}, - {"last_rebalance_time", std::make_shared()}, - {"num_rebalance_revocations", std::make_shared()}, - {"num_rebalance_assignments", std::make_shared()}, - {"is_currently_used", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Database of the table with Kafka Engine."}, + {"table", std::make_shared(), "Name of the table with Kafka Engine."}, + {"consumer_id", std::make_shared(), "Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter."}, + {"assignments.topic", std::make_shared(std::make_shared()), "Kafka topic."}, + {"assignments.partition_id", std::make_shared(std::make_shared()), "Kafka partition id. Note, that only one consumer can be assigned to a partition."}, + {"assignments.current_offset", std::make_shared(std::make_shared()), "Current offset."}, + {"exceptions.time", std::make_shared(std::make_shared()), "Timestamp when the 10 most recent exceptions were generated."}, + {"exceptions.text", std::make_shared(std::make_shared()), "Text of 10 most recent exceptions."}, + {"last_poll_time", std::make_shared(), "Timestamp of the most recent poll."}, + {"num_messages_read", std::make_shared(), "Number of messages read by the consumer."}, + {"last_commit_time", std::make_shared(), "Timestamp of the most recent poll."}, + {"num_commits", std::make_shared(), "Total number of commits for the consumer."}, + {"last_rebalance_time", std::make_shared(), "Timestamp of the most recent Kafka rebalance."}, + {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, + {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, + {"is_currently_used", std::make_shared(), "Consumer is in use."}, {"last_used", std::make_shared(6)}, - {"rdkafka_stat", std::make_shared()}, + {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; - return names_and_types; } void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h index eda3a39bc7e..ae2c726849d 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.h +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -15,7 +15,7 @@ class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock()}, - {"license_type", std::make_shared()}, - {"license_path", std::make_shared()}, - {"license_text", std::make_shared()}, + return ColumnsDescription + { + {"library_name", std::make_shared(), "Name of the library."}, + {"license_type", std::make_shared(), "License type — e.g. Apache, MIT."}, + {"license_path", std::make_shared(), "Path to the file with the license text."}, + {"license_text", std::make_shared(), "License text."}, }; } diff --git a/src/Storages/System/StorageSystemLicenses.h b/src/Storages/System/StorageSystemLicenses.h index 76320607805..57a3ff201a2 100644 --- a/src/Storages/System/StorageSystemLicenses.h +++ b/src/Storages/System/StorageSystemLicenses.h @@ -23,6 +23,6 @@ public: return "SystemLicenses"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemMacros.cpp b/src/Storages/System/StorageSystemMacros.cpp index 576fbc69039..6c1a24d152a 100644 --- a/src/Storages/System/StorageSystemMacros.cpp +++ b/src/Storages/System/StorageSystemMacros.cpp @@ -6,11 +6,12 @@ namespace DB { -NamesAndTypesList StorageSystemMacros::getNamesAndTypes() +ColumnsDescription StorageSystemMacros::getColumnsDescription() { - return { - {"macro", std::make_shared()}, - {"substitution", std::make_shared()}, + return ColumnsDescription + { + {"macro", std::make_shared(), "The macro name."}, + {"substitution", std::make_shared(), "The substitution string."}, }; } diff --git a/src/Storages/System/StorageSystemMacros.h b/src/Storages/System/StorageSystemMacros.h index 58c99f9efb8..ffbeb70796e 100644 --- a/src/Storages/System/StorageSystemMacros.h +++ b/src/Storages/System/StorageSystemMacros.h @@ -17,7 +17,7 @@ class StorageSystemMacros final : public IStorageSystemOneBlock -NamesAndTypesList SystemMergeTreeSettings::getNamesAndTypes() +ColumnsDescription SystemMergeTreeSettings::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"value", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, - {"readonly", std::make_shared()}, - {"type", std::make_shared()}, - {"is_obsolete", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting name."}, + {"value", std::make_shared(), "Setting value."}, + {"changed", std::make_shared(), "1 if the setting was explicitly defined in the config or explicitly changed."}, + {"description", std::make_shared(), "Setting description."}, + {"min", std::make_shared(std::make_shared()), "Minimum value of the setting, if any is set via constraints. If the setting has no minimum value, contains NULL."}, + {"max", std::make_shared(std::make_shared()), "Maximum value of the setting, if any is set via constraints. If the setting has no maximum value, contains NULL."}, + {"readonly", std::make_shared(), + "Shows whether the current user can change the setting: " + "0 — Current user can change the setting, " + "1 — Current user can't change the setting." + }, + {"type", std::make_shared(), "Setting type (implementation specific string value)."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."}, }; } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index f8a3c9ee422..48e83f0a880 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -19,7 +19,7 @@ class SystemMergeTreeSettings final : public IStorageSystemOneBlock>::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 2dbe2964eb9..4129e4c235b 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -7,33 +7,34 @@ namespace DB { -NamesAndTypesList StorageSystemMerges::getNamesAndTypes() +ColumnsDescription StorageSystemMerges::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"progress", std::make_shared()}, - {"num_parts", std::make_shared()}, - {"source_part_names", std::make_shared(std::make_shared())}, - {"result_part_name", std::make_shared()}, - {"source_part_paths", std::make_shared(std::make_shared())}, - {"result_part_path", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "The name of the database the table is in."}, + {"table", std::make_shared(), "Table name."}, + {"elapsed", std::make_shared(), "The time elapsed (in seconds) since the merge started."}, + {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, + {"num_parts", std::make_shared(), "The number of parts to be merged."}, + {"source_part_names", std::make_shared(std::make_shared()), ""}, + {"result_part_name", std::make_shared(), "The name of the part that will be formed as the result of merging."}, + {"source_part_paths", std::make_shared(std::make_shared()), ""}, + {"result_part_path", std::make_shared(), ""}, {"partition_id", std::make_shared()}, {"partition", std::make_shared()}, - {"is_mutation", std::make_shared()}, - {"total_size_bytes_compressed", std::make_shared()}, - {"total_size_bytes_uncompressed", std::make_shared()}, - {"total_size_marks", std::make_shared()}, - {"bytes_read_uncompressed", std::make_shared()}, - {"rows_read", std::make_shared()}, - {"bytes_written_uncompressed", std::make_shared()}, - {"rows_written", std::make_shared()}, - {"columns_written", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"merge_type", std::make_shared()}, - {"merge_algorithm", std::make_shared()}, + {"is_mutation", std::make_shared(), "1 if this process is a part mutation."}, + {"total_size_bytes_compressed", std::make_shared(), "The total size of the compressed data in the merged chunks."}, + {"total_size_bytes_uncompressed", std::make_shared(), "The total size of compressed data in the merged chunks."}, + {"total_size_marks", std::make_shared(), "The total number of marks in the merged parts."}, + {"bytes_read_uncompressed", std::make_shared(), "Number of bytes read, uncompressed."}, + {"rows_read", std::make_shared(), "Number of rows read."}, + {"bytes_written_uncompressed", std::make_shared(), "Number of bytes written, uncompressed."}, + {"rows_written", std::make_shared(), "Number of rows written."}, + {"columns_written", std::make_shared(), "Number of columns written (for Vertical merge algorithm)."}, + {"memory_usage", std::make_shared(), "Memory consumption of the merge process."}, + {"thread_id", std::make_shared(), "Thread ID of the merge process."}, + {"merge_type", std::make_shared(), "The type of current merge. Empty if it's an mutation."}, + {"merge_algorithm", std::make_shared(), "The algorithm used in current merge. Empty if it's an mutation."}, }; } diff --git a/src/Storages/System/StorageSystemMerges.h b/src/Storages/System/StorageSystemMerges.h index d4a2b98d9ab..961d28daf9a 100644 --- a/src/Storages/System/StorageSystemMerges.h +++ b/src/Storages/System/StorageSystemMerges.h @@ -17,7 +17,7 @@ class StorageSystemMerges final : public IStorageSystemOneBlock #include #include #include @@ -8,20 +9,20 @@ namespace DB { -NamesAndTypesList StorageSystemMetrics::getNamesAndTypes() +ColumnsDescription StorageSystemMetrics::getColumnsDescription() { - return { - {"metric", std::make_shared()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + auto description = ColumnsDescription + { + {"metric", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, + {"description", std::make_shared(), "Metric description."}, }; -} -NamesAndAliases StorageSystemMetrics::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "metric"} - }; + }); + + return description; } void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index e3e2c07014f..ec0c67cf6b7 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -16,9 +16,7 @@ class StorageSystemMetrics final : public IStorageSystemOneBlock() }, { "type", std::make_shared() }, { "loading_start_time", std::make_shared() }, diff --git a/src/Storages/System/StorageSystemModels.h b/src/Storages/System/StorageSystemModels.h index dfb6ad3de5a..91fa3761743 100644 --- a/src/Storages/System/StorageSystemModels.h +++ b/src/Storages/System/StorageSystemModels.h @@ -14,7 +14,7 @@ class StorageSystemModels final : public IStorageSystemOneBlock()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"target_disk_name", std::make_shared()}, - {"target_disk_path", std::make_shared()}, - {"part_name", std::make_shared()}, - {"part_size", std::make_shared()}, - {"thread_id", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table containing moving data part."}, + {"elapsed", std::make_shared(), "Time elapsed (in seconds) since data part movement started."}, + {"target_disk_name", std::make_shared(), "Name of disk to which the data part is moving."}, + {"target_disk_path", std::make_shared(), "Path to the mount point of the disk in the file system."}, + {"part_name", std::make_shared(), "Name of the data part being moved."}, + {"part_size", std::make_shared(), "Data part size."}, + {"thread_id", std::make_shared(), "Identifier of a thread performing the movement."}, }; } diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h index 2e4ceec2abd..acdd9642f8f 100644 --- a/src/Storages/System/StorageSystemMoves.h +++ b/src/Storages/System/StorageSystemMoves.h @@ -17,7 +17,7 @@ class StorageSystemMoves final : public IStorageSystemOneBlock() }, - { "table", std::make_shared() }, - { "mutation_id", std::make_shared() }, - { "command", std::make_shared() }, - { "create_time", std::make_shared() }, - { "block_numbers.partition_id", std::make_shared(std::make_shared()) }, - { "block_numbers.number", std::make_shared(std::make_shared()) }, - { "parts_to_do_names", std::make_shared(std::make_shared()) }, - { "parts_to_do", std::make_shared() }, - { "is_done", std::make_shared() }, + return ColumnsDescription + { + { "database", std::make_shared(), "The name of the database to which the mutation was applied."}, + { "table", std::make_shared(), "The name of the table to which the mutation was applied."}, + { "mutation_id", std::make_shared(), "The ID of the mutation. For replicated tables these IDs correspond to znode names in the /mutations/ directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table."}, + { "command", std::make_shared(), "The mutation command string (the part of the query after ALTER TABLE [db.]table)."}, + { "create_time", std::make_shared(), "Date and time when the mutation command was submitted for execution."}, + { "block_numbers.partition_id", std::make_shared(std::make_shared()), "For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty."}, + { "block_numbers.number", std::make_shared(std::make_shared()), + "For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. " + "Only parts that contain blocks with numbers less than this number will be mutated in the partition." + "In non-replicated tables, block numbers in all partitions form a single sequence. " + "This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation." + }, + { "parts_to_do_names", std::make_shared(std::make_shared()), "An array of names of data parts that need to be mutated for the mutation to complete."}, + { "parts_to_do", std::make_shared(), "The number of data parts that need to be mutated for the mutation to complete."}, + { "is_done", std::make_shared(), + "The flag whether the mutation is done or not. Possible values: " + "1 if the mutation is completed, " + "0 if the mutation is still in process. " + }, { "is_killed", std::make_shared() }, - { "latest_failed_part", std::make_shared() }, - { "latest_fail_time", std::make_shared() }, - { "latest_fail_reason", std::make_shared() }, + { "latest_failed_part", std::make_shared(), "The name of the most recent part that could not be mutated."}, + { "latest_fail_time", std::make_shared(), "The date and time of the most recent part mutation failure."}, + { "latest_fail_reason", std::make_shared(), "The exception message that caused the most recent part mutation failure."}, }; } diff --git a/src/Storages/System/StorageSystemMutations.h b/src/Storages/System/StorageSystemMutations.h index 0dac17eeb10..2db6e0c17f1 100644 --- a/src/Storages/System/StorageSystemMutations.h +++ b/src/Storages/System/StorageSystemMutations.h @@ -16,7 +16,7 @@ class StorageSystemMutations final : public IStorageSystemOneBlock + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemMySQLBinlogs::getNamesAndTypes() +{ + return { + {"binlog_client_name", std::make_shared()}, + {"name", std::make_shared()}, + {"mysql_binlog_name", std::make_shared()}, + {"mysql_binlog_pos", std::make_shared()}, + {"mysql_binlog_timestamp", std::make_shared()}, + {"mysql_binlog_executed_gtid_set", std::make_shared()}, + {"dispatcher_name", std::make_shared()}, + {"dispatcher_mysql_binlog_name", std::make_shared()}, + {"dispatcher_mysql_binlog_pos", std::make_shared()}, + {"dispatcher_mysql_binlog_timestamp", std::make_shared()}, + {"dispatcher_mysql_binlog_executed_gtid_set", std::make_shared()}, + {"size", std::make_shared()}, + {"bytes", std::make_shared()}, + {"max_bytes", std::make_shared()}, + {"max_waiting_ms", std::make_shared()}, + {"dispatcher_events_read_per_sec", std::make_shared()}, + {"dispatcher_bytes_read_per_sec", std::make_shared()}, + {"dispatcher_events_flush_per_sec", std::make_shared()}, + {"dispatcher_bytes_flush_per_sec", std::make_shared()}, + }; +} + +StorageSystemMySQLBinlogs::StorageSystemMySQLBinlogs(const StorageID & storage_id_) + : IStorage(storage_id_) +{ + StorageInMemoryMetadata storage_metadata; + ColumnsDescription columns(getNamesAndTypes()); + storage_metadata.setColumns(columns); + setInMemoryMetadata(storage_metadata); +} + +class MetadataSource : public ISource +{ +public: + using DispatcherMetadata = MySQLReplication::BinlogEventsDispatcher::DispatcherMetadata; + using BinlogMetadata = MySQLReplication::BinlogEventsDispatcher::BinlogMetadata; + + MetadataSource(Block block_header_, const std::vector & clients_) + : ISource(block_header_) + , block_to_fill(std::move(block_header_)) + , clients(clients_) + {} + + String getName() const override { return "MySQLBinlogClient"; } + +protected: + Chunk generate() override + { + if (clients.empty()) + return {}; + + Columns columns; + columns.reserve(block_to_fill.columns()); + + size_t total_size = 0; + auto create_column = [&](auto && column, const std::function & field) + { + size_t size = 0; + for (const auto & client : clients) + { + for (const auto & d : client.dispatchers) + { + for (const auto & b : d.binlogs) + { + column->insert(field(client.binlog_client_name, d, b)); + ++size; + } + } + } + if (!total_size) + total_size = size; + return std::forward(column); + }; + + for (const auto & elem : block_to_fill) + { + if (elem.name == "binlog_client_name") + columns.emplace_back(create_column(ColumnString::create(), [](auto n, auto, auto) { return Field(n); })); + else if (elem.name == "name") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto, auto b) { return Field(b.name); })); + else if (elem.name == "mysql_binlog_name") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto, auto b) { return Field(b.position_read.binlog_name); })); + else if (elem.name == "mysql_binlog_pos") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.position_read.binlog_pos); })); + else if (elem.name == "mysql_binlog_timestamp") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.position_read.timestamp); })); + else if (elem.name == "mysql_binlog_executed_gtid_set") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto, auto b) { return Field(b.position_read.gtid_sets.toString()); })); + else if (elem.name == "dispatcher_name") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto d, auto) { return Field(d.name); })); + else if (elem.name == "dispatcher_mysql_binlog_name") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto d, auto) { return Field(d.position.binlog_name); })); + else if (elem.name == "dispatcher_mysql_binlog_pos") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto d, auto) { return Field(d.position.binlog_pos); })); + else if (elem.name == "dispatcher_mysql_binlog_timestamp") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto d, auto) { return Field(d.position.timestamp); })); + else if (elem.name == "dispatcher_mysql_binlog_executed_gtid_set") + columns.emplace_back(create_column(ColumnString::create(), [](auto, auto d, auto) { return Field(d.position.gtid_sets.toString()); })); + else if (elem.name == "size") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.size); })); + else if (elem.name == "bytes") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.bytes); })); + else if (elem.name == "max_bytes") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.max_bytes); })); + else if (elem.name == "max_waiting_ms") + columns.emplace_back(create_column(ColumnUInt64::create(), [](auto, auto, auto b) { return Field(b.max_waiting_ms); })); + else if (elem.name == "dispatcher_events_read_per_sec") + columns.emplace_back(create_column(ColumnFloat32::create(), [](auto, auto d, auto) { return Field(d.events_read_per_sec); })); + else if (elem.name == "dispatcher_bytes_read_per_sec") + columns.emplace_back(create_column(ColumnFloat32::create(), [](auto, auto d, auto) { return Field(d.bytes_read_per_sec); })); + else if (elem.name == "dispatcher_events_flush_per_sec") + columns.emplace_back(create_column(ColumnFloat32::create(), [](auto, auto d, auto) { return Field(d.events_flush_per_sec); })); + else if (elem.name == "dispatcher_bytes_flush_per_sec") + columns.emplace_back(create_column(ColumnFloat32::create(), [](auto, auto d, auto) { return Field(d.bytes_flush_per_sec); })); + } + + clients.clear(); + return {std::move(columns), total_size}; + } + +private: + Block block_to_fill; + std::vector clients; +}; + +Pipe StorageSystemMySQLBinlogs::read( + const Names & column_names_, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /* query_info_ */, + ContextPtr /*context_ */, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) +{ + storage_snapshot->check(column_names_); + const ColumnsDescription & our_columns = storage_snapshot->getDescriptionForColumns(column_names_); + Block block_header; + for (const auto & name : column_names_) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + return Pipe{std::make_shared(block_header, MySQLReplication::BinlogClientFactory::instance().getMetadata())}; +} + +} diff --git a/src/Storages/System/StorageSystemMySQLBinlogs.h b/src/Storages/System/StorageSystemMySQLBinlogs.h new file mode 100644 index 00000000000..a627137f495 --- /dev/null +++ b/src/Storages/System/StorageSystemMySQLBinlogs.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace DB +{ + +class StorageSystemMySQLBinlogs final : public IStorage +{ +public: + explicit StorageSystemMySQLBinlogs(const StorageID & storage_id_); + + std::string getName() const override { return "MySQLBinlogs"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool isSystemStorage() const override { return true; } + + static NamesAndTypesList getNamesAndTypes(); +}; + +} diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 1d94b0afd1b..25401bb751b 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -15,11 +15,12 @@ namespace DB { -NamesAndTypesList StorageSystemNamedCollections::getNamesAndTypes() +ColumnsDescription StorageSystemNamedCollections::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"collection", std::make_shared(std::make_shared(), std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the collection."}, + {"collection", std::make_shared(std::make_shared(), std::make_shared()), "Collection internals."}, }; } diff --git a/src/Storages/System/StorageSystemNamedCollections.h b/src/Storages/System/StorageSystemNamedCollections.h index d20fa62d30b..596df99be83 100644 --- a/src/Storages/System/StorageSystemNamedCollections.h +++ b/src/Storages/System/StorageSystemNamedCollections.h @@ -12,7 +12,7 @@ public: std::string getName() const override { return "SystemNamedCollections"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index c397392e9fb..1a2646d3295 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -16,9 +16,11 @@ namespace DB { -NamesAndTypesList StorageSystemPartMovesBetweenShards::getNamesAndTypes() +ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() { - return { + /// TODO: Fill in all the comments + return ColumnsDescription + { /// Table properties. { "database", std::make_shared() }, { "table", std::make_shared() }, diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.h b/src/Storages/System/StorageSystemPartMovesBetweenShards.h index 9fbc8e532a4..93a26bcd1b7 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.h +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.h @@ -14,7 +14,7 @@ class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock< public: std::string getName() const override { return "SystemShardMoves"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 57c455fcdc7..b6e4ee4161e 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -45,54 +45,54 @@ namespace DB StorageSystemParts::StorageSystemParts(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"primary_key_size", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"secondary_indices_compressed_bytes", std::make_shared()}, - {"secondary_indices_uncompressed_bytes", std::make_shared()}, - {"secondary_indices_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + ColumnsDescription{ + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"uuid", std::make_shared(), "The UUID of data part."}, + {"part_type", std::make_shared(), "The data part storing format. Possible Values: Wide (a file per column) and Compact (a single file for all columns)."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's about to be deleted. Inactive data parts appear after merging and mutating operations."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"primary_key_size", std::make_shared(), "The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"secondary_indices_compressed_bytes", std::make_shared(), "Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"secondary_indices_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"secondary_indices_marks_bytes", std::make_shared(), "The size of the file with marks for secondary indices."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value of the date key in the data part."}, + {"max_date", std::make_shared(), "The maximum value of the date key in the data part."}, + {"min_time", std::make_shared(), "The minimum value of the date and time key in the data part."}, + {"max_time", std::make_shared(), "The maximum value of the date and time key in the data part."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, + {"is_frozen", std::make_shared(), "Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. "}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"hash_of_all_files", std::make_shared(), "sipHash128 of compressed files."}, + {"hash_of_uncompressed_files", std::make_shared(), "sipHash128 of uncompressed files (files with marks, index file etc.)."}, + {"uncompressed_hash_of_compressed_files", std::make_shared(), "sipHash128 of data in the compressed files as if they were uncompressed."}, - {"delete_ttl_info_min", std::make_shared()}, - {"delete_ttl_info_max", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(), "The minimum value of the date and time key for TTL DELETE rule."}, + {"delete_ttl_info_max", std::make_shared(), "The maximum value of the date and time key for TTL DELETE rule."}, - {"move_ttl_info.expression", std::make_shared(std::make_shared())}, - {"move_ttl_info.min", std::make_shared(std::make_shared())}, - {"move_ttl_info.max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared()), "Array of expressions. Each expression defines a TTL MOVE rule."}, + {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, + {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, {"default_compression_codec", std::make_shared()}, @@ -119,7 +119,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"has_lightweight_delete", std::make_shared()}, - {"last_removal_attempt_time", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared()}, {"removal_state", std::make_shared()}, } ) diff --git a/src/Storages/System/StorageSystemParts.h b/src/Storages/System/StorageSystemParts.h index c7a46cfda54..e0082e40e7d 100644 --- a/src/Storages/System/StorageSystemParts.h +++ b/src/Storages/System/StorageSystemParts.h @@ -11,7 +11,7 @@ class Context; /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. */ -class StorageSystemParts final : public StorageSystemPartsBase +class StorageSystemParts : public StorageSystemPartsBase { public: explicit StorageSystemParts(const StorageID & table_id_); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 8d2e2900722..48dab8c4777 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -22,11 +24,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) { bool has_state_column = false; @@ -83,7 +80,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo } StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) - : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()) + : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. /// This is kind of complicated, but we use WHERE to do less work. @@ -93,6 +90,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte MutableColumnPtr table_column_mut = ColumnString::create(); MutableColumnPtr engine_column_mut = ColumnString::create(); MutableColumnPtr active_column_mut = ColumnUInt8::create(); + MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create(); const auto access = context->getAccess(); const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); @@ -139,6 +137,14 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte continue; String engine_name = storage->getName(); + UUID storage_uuid = storage->getStorageID().uuid; + if (database->getEngineName() == "Ordinary") + { + SipHash hash; + hash.update(database_name); + hash.update(table_name); + storage_uuid = hash.get128(); + } #if USE_MYSQL if (auto * proxy = dynamic_cast(storage.get())) @@ -153,7 +159,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) continue; - storages[std::make_pair(database_name, iterator->name())] = storage; + storages[storage_uuid] = storage; /// Add all combinations of flag 'active'. for (UInt64 active : {0, 1}) @@ -161,6 +167,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte table_column_mut->insert(table_name); engine_column_mut->insert(engine_name); active_column_mut->insert(active); + storage_uuid_column_mut->insert(storage_uuid); } offsets[i] += 2; @@ -178,6 +185,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared(), "table")); block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared(), "engine")); block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared(), "active")); + block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared(), "uuid")); if (rows) { @@ -189,57 +197,9 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte database_column = block_to_filter.getByName("database").column; table_column = block_to_filter.getByName("table").column; active_column = block_to_filter.getByName("active").column; - - next_row = 0; + storage_uuid_column = block_to_filter.getByName("uuid").column; } -StoragesInfo StoragesInfoStream::next() -{ - while (next_row < rows) - { - StoragesInfo info; - - info.database = (*database_column)[next_row].get(); - info.table = (*table_column)[next_row].get(); - - auto is_same_table = [&info, this] (size_t row) -> bool - { - return (*database_column)[row].get() == info.database && - (*table_column)[row].get() == info.table; - }; - - /// We may have two rows per table which differ in 'active' value. - /// If rows with 'active = 0' were not filtered out, this means we - /// must collect the inactive parts. Remember this fact in StoragesInfo. - for (; next_row < rows && is_same_table(next_row); ++next_row) - { - const auto active = (*active_column)[next_row].get(); - if (active == 0) - info.need_inactive_parts = true; - } - - info.storage = storages.at(std::make_pair(info.database, info.table)); - - /// For table not to be dropped and set of columns to remain constant. - info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout); - - if (info.table_lock == nullptr) - { - // Table was dropped while acquiring the lock, skipping table - continue; - } - - info.engine = info.storage->getName(); - - info.data = dynamic_cast(info.storage.get()); - if (!info.data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine); - - return info; - } - - return {}; -} Pipe StorageSystemPartsBase::read( const Names & column_names, @@ -252,7 +212,7 @@ Pipe StorageSystemPartsBase::read( { bool has_state_column = hasStateColumn(column_names, storage_snapshot); - StoragesInfoStream stream(query_info, context); + auto stream = getStoragesInfoStream(query_info, context); /// Create the result. Block sample = storage_snapshot->metadata->getSampleBlock(); @@ -263,7 +223,7 @@ Pipe StorageSystemPartsBase::read( if (has_state_column) res_columns.push_back(ColumnString::create()); - while (StoragesInfo info = stream.next()) + while (StoragesInfo info = stream->next()) { processNextStorage(context, res_columns, columns_mask, info, has_state_column); } @@ -278,19 +238,17 @@ Pipe StorageSystemPartsBase::read( } -StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_) +StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns) : IStorage(table_id_) { - ColumnsDescription tmp_columns(std::move(columns_)); - auto add_alias = [&](const String & alias_name, const String & column_name) { - if (!tmp_columns.has(column_name)) + if (!columns.has(column_name)) return; - ColumnDescription column(alias_name, tmp_columns.get(column_name).type); + ColumnDescription column(alias_name, columns.get(column_name).type); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(column_name); - tmp_columns.add(column); + columns.add(column); }; /// Add aliases for old column names for backwards compatibility. @@ -299,7 +257,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Name add_alias("part_name", "name"); StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(tmp_columns); + storage_metadata.setColumns(columns); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index c3d2e64b303..0a45d0f9dfe 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + class Context; struct StoragesInfo @@ -29,13 +34,66 @@ struct StoragesInfo }; /** A helper class that enumerates the storages that match given query. */ -class StoragesInfoStream +class StoragesInfoStreamBase { public: - StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context); - StoragesInfo next(); + StoragesInfoStreamBase(ContextPtr context) + : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()), next_row(0), rows(0) + {} -private: + StoragesInfoStreamBase(const StoragesInfoStreamBase&) = default; + virtual ~StoragesInfoStreamBase() = default; + + StoragesInfo next() + { + while (next_row < rows) + { + StoragesInfo info; + + info.database = (*database_column)[next_row].get(); + info.table = (*table_column)[next_row].get(); + UUID storage_uuid = (*storage_uuid_column)[next_row].get(); + + auto is_same_table = [&storage_uuid, this] (size_t row) -> bool + { + return (*storage_uuid_column)[row].get() == storage_uuid; + }; + + /// We may have two rows per table which differ in 'active' value. + /// If rows with 'active = 0' were not filtered out, this means we + /// must collect the inactive parts. Remember this fact in StoragesInfo. + for (; next_row < rows && is_same_table(next_row); ++next_row) + { + const auto active = (*active_column)[next_row].get(); + if (active == 0) + info.need_inactive_parts = true; + } + + info.storage = storages.at(storage_uuid); + + /// For table not to be dropped and set of columns to remain constant. + if (!tryLockTable(info)) + continue; + + info.engine = info.storage->getName(); + + info.data = dynamic_cast(info.storage.get()); + if (!info.data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine); + + return info; + } + + return {}; + } +protected: + virtual bool tryLockTable(StoragesInfo & info) + { + info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout); + // nullptr means table was dropped while acquiring the lock + return info.table_lock != nullptr; + } +protected: String query_id; Settings settings; @@ -43,14 +101,22 @@ private: ColumnPtr database_column; ColumnPtr table_column; ColumnPtr active_column; + ColumnPtr storage_uuid_column; size_t next_row; size_t rows; - using StoragesMap = std::map, StoragePtr>; + using StoragesMap = std::unordered_map; StoragesMap storages; }; + +class StoragesInfoStream : public StoragesInfoStreamBase +{ +public: + StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context); +}; + /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. */ class StorageSystemPartsBase : public IStorage @@ -75,7 +141,12 @@ private: protected: const FormatSettings format_settings = {}; - StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_); + StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); + + virtual std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) + { + return std::make_unique(query_info, context); + } virtual void processNextStorage(ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) = 0; diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index a41ab24a340..833a5e1ec16 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -21,7 +21,7 @@ namespace DB StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { + ColumnsDescription{ {"partition", std::make_shared()}, {"name", std::make_shared()}, {"uuid", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index ee412d0e648..f45f3c6ed01 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -64,15 +64,16 @@ const std::vector> & StorageSystemPrivileges::getAccess } -NamesAndTypesList StorageSystemPrivileges::getNamesAndTypes() +ColumnsDescription StorageSystemPrivileges::getColumnsDescription() { - NamesAndTypesList names_and_types{ + /// TODO: Fill in all the comments. + return ColumnsDescription + { {"privilege", std::make_shared(getAccessTypeEnumValues())}, {"aliases", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemPrivileges.h b/src/Storages/System/StorageSystemPrivileges.h index a30fefad137..4441cf78d5c 100644 --- a/src/Storages/System/StorageSystemPrivileges.h +++ b/src/Storages/System/StorageSystemPrivileges.h @@ -12,7 +12,7 @@ class StorageSystemPrivileges final : public IStorageSystemOneBlock> & getAccessTypeEnumValues(); protected: diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index e053f2e63ff..6702e68b81e 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -17,14 +17,15 @@ namespace DB { -NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() +ColumnsDescription StorageSystemProcesses::getColumnsDescription() { - return { + auto description = ColumnsDescription + { {"is_initial_query", std::make_shared()}, - {"user", std::make_shared()}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, + {"user", std::make_shared(), "The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the default user. The field contains the username for a specific query, not for a query that this query initiated."}, + {"query_id", std::make_shared(), "Query ID, if defined."}, + {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, {"port", std::make_shared()}, {"initial_user", std::make_shared()}, @@ -50,17 +51,17 @@ NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() {"quota_key", std::make_shared()}, {"distributed_depth", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"is_cancelled", std::make_shared()}, - {"is_all_data_sent", std::make_shared()}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"total_rows_approx", std::make_shared()}, + {"elapsed", std::make_shared(), "The time in seconds since request execution started."}, + {"is_cancelled", std::make_shared(), "Was query cancelled."}, + {"is_all_data_sent", std::make_shared(), "Was all data sent to the client (in other words query had been finished on the server)."}, + {"read_rows", std::make_shared(), "The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, + {"read_bytes", std::make_shared(), "The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, + {"total_rows_approx", std::make_shared(), "The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known."}, {"written_rows", std::make_shared()}, {"written_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, + {"memory_usage", std::make_shared(), "Amount of RAM the query uses. It might not include some types of dedicated memory"}, {"peak_memory_usage", std::make_shared()}, - {"query", std::make_shared()}, + {"query", std::make_shared(), "The query text. For INSERT, it does not include the data to insert."}, {"query_kind", std::make_shared()}, {"thread_ids", std::make_shared(std::make_shared())}, @@ -69,17 +70,15 @@ NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() {"current_database", std::make_shared()}, }; -} -NamesAndAliases StorageSystemProcesses::getNamesAndAliases() -{ - return - { + description.setAliases({ {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}, {"Settings.Names", {std::make_shared(std::make_shared())}, "mapKeys(Settings)" }, {"Settings.Values", {std::make_shared(std::make_shared())}, "mapValues(Settings)"} - }; + }); + + return description; } void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemProcesses.h b/src/Storages/System/StorageSystemProcesses.h index 8e61a3a402c..3017f9fd367 100644 --- a/src/Storages/System/StorageSystemProcesses.h +++ b/src/Storages/System/StorageSystemProcesses.h @@ -16,9 +16,7 @@ class StorageSystemProcesses final : public IStorageSystemOneBlock()}, {"name", std::make_shared()}, {"part_type", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 06becc6d91c..2ff25f86366 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -18,7 +18,7 @@ namespace DB StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { + ColumnsDescription{ {"partition", std::make_shared()}, {"name", std::make_shared()}, {"part_type", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 8538820cf41..03111755904 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -9,16 +9,17 @@ namespace DB { -NamesAndTypesList StorageSystemQueryCache::getNamesAndTypes() +ColumnsDescription StorageSystemQueryCache::getColumnsDescription() { - return { - {"query", std::make_shared()}, - {"result_size", std::make_shared()}, - {"stale", std::make_shared()}, - {"shared", std::make_shared()}, - {"compressed", std::make_shared()}, - {"expires_at", std::make_shared()}, - {"key_hash", std::make_shared()} + return ColumnsDescription + { + {"query", std::make_shared(), "Query string."}, + {"result_size", std::make_shared(), "Size of the query cache entry."}, + {"stale", std::make_shared(), "If the query cache entry is stale."}, + {"shared", std::make_shared(), "If the query cache entry is shared between multiple users."}, + {"compressed", std::make_shared(), "If the query cache entry is compressed."}, + {"expires_at", std::make_shared(), "When the query cache entry becomes stale."}, + {"key_hash", std::make_shared(), "A hash of the query string, used as a key to find query cache entries."} }; } @@ -37,11 +38,15 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr std::vector content = query_cache->dump(); const String & user_name = context->getUserName(); + std::optional user_id = context->getUserID(); + std::vector current_user_roles = context->getCurrentRoles(); for (const auto & [key, query_result] : content) { /// Showing other user's queries is considered a security risk - if (!key.is_shared && key.user_name != user_name) + const bool is_same_user_id = ((!key.user_id.has_value() && !user_id.has_value()) || (key.user_id.has_value() && user_id.has_value() && *key.user_id == *user_id)); + const bool is_same_current_user_roles = (key.current_user_roles == current_user_roles); + if (!key.is_shared && (!is_same_user_id || !is_same_current_user_roles)) continue; res_columns[0]->insert(key.query_string); /// approximates the original query string diff --git a/src/Storages/System/StorageSystemQueryCache.h b/src/Storages/System/StorageSystemQueryCache.h index 5ff5f0a0454..08ad30afb81 100644 --- a/src/Storages/System/StorageSystemQueryCache.h +++ b/src/Storages/System/StorageSystemQueryCache.h @@ -12,7 +12,7 @@ public: std::string getName() const override { return "SystemQueryCache"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index 6cc269130a0..f125a990a88 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -40,12 +40,23 @@ namespace } -NamesAndTypesList StorageSystemQuotaLimits::getNamesAndTypes() +ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"quota_name", std::make_shared()}, - {"duration", std::make_shared()}, - {"is_randomized_interval", std::make_shared()}, + ColumnsDescription result + { + {"quota_name", std::make_shared(), "Quota name."}, + {"duration", std::make_shared(), "Length of the time interval for calculating resource consumption, in seconds."}, + {"is_randomized_interval", std::make_shared(), + "Boolean value. It shows whether the interval is randomized. " + "Interval always starts at the same time if it is not randomized. " + "For example, an interval of 1 minute always starts at an integer number of minutes " + "(i.e. it can start at 11:20:00, but it never starts at 11:20:01), " + "an interval of one day always starts at midnight UTC. " + "If interval is randomized, the very first interval starts at random time, " + "and subsequent intervals starts one by one. Values: " + "0 — Interval is not randomized, " + "1 — Interval is randomized." + }, }; for (auto quota_type : collections::range(QuotaType::MAX)) @@ -57,10 +68,10 @@ NamesAndTypesList StorageSystemQuotaLimits::getNamesAndTypes() data_type = std::make_shared(); else data_type = std::make_shared(); - names_and_types.push_back({column_name, std::make_shared(data_type)}); + result.add({column_name, std::make_shared(data_type)}); } - return names_and_types; + return result; } diff --git a/src/Storages/System/StorageSystemQuotaLimits.h b/src/Storages/System/StorageSystemQuotaLimits.h index 927e45492e1..acc977d0df7 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.h +++ b/src/Storages/System/StorageSystemQuotaLimits.h @@ -12,7 +12,7 @@ class StorageSystemQuotaLimits final : public IStorageSystemOneBlock()}, - {"quota_key", std::make_shared()} + ColumnsDescription description + { + {"quota_name", std::make_shared(), "Quota name."}, + {"quota_key", std::make_shared(), "Key value."} }; if (add_column_is_current) - names_and_types.push_back({"is_current", std::make_shared()}); + description.add({"is_current", std::make_shared(), "Quota usage for current user."}); - names_and_types.push_back({"start_time", std::make_shared(std::make_shared())}); - names_and_types.push_back({"end_time", std::make_shared(std::make_shared())}); - names_and_types.push_back({"duration", std::make_shared(std::make_shared())}); + description.add({ + "start_time", + std::make_shared(std::make_shared()), + "Start time for calculating resource consumption." + }); + description.add({ + "end_time", + std::make_shared(std::make_shared()), + "End time for calculating resource consumption." + }); + description.add({ + "duration", + std::make_shared(std::make_shared()), + "Length of the time interval for calculating resource consumption, in seconds." + }); for (auto quota_type : collections::range(QuotaType::MAX)) { @@ -68,11 +81,11 @@ NamesAndTypesList StorageSystemQuotaUsage::getNamesAndTypesImpl(bool add_column_ data_type = std::make_shared(); else data_type = std::make_shared(); - names_and_types.push_back({column_name, std::make_shared(data_type)}); - names_and_types.push_back({String("max_") + column_name, std::make_shared(data_type)}); + description.add({column_name, std::make_shared(data_type)}); + description.add({String("max_") + column_name, std::make_shared(data_type)}); } - return names_and_types; + return description; } diff --git a/src/Storages/System/StorageSystemQuotaUsage.h b/src/Storages/System/StorageSystemQuotaUsage.h index 47cf8f5dfc0..a3109e9ca31 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.h +++ b/src/Storages/System/StorageSystemQuotaUsage.h @@ -16,9 +16,9 @@ class StorageSystemQuotaUsage final : public IStorageSystemOneBlock & quotas_usage); protected: diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 439883e038a..ee302f2f163 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -35,19 +35,34 @@ namespace } -NamesAndTypesList StorageSystemQuotas::getNamesAndTypes() +ColumnsDescription StorageSystemQuotas::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"keys", std::make_shared(std::make_shared(getKeyTypeEnumValues()))}, - {"durations", std::make_shared(std::make_shared())}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())} + return ColumnsDescription + { + {"name", std::make_shared(), "Quota name."}, + {"id", std::make_shared(), "Quota ID."}, + {"storage", std::make_shared(), "Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query."}, + {"keys", std::make_shared(std::make_shared(getKeyTypeEnumValues())), + "Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values: " + "[] — All users share the same quota, " + "['user_name'] — Connections with the same user name share the same quota, " + "['ip_address'] — Connections from the same IP share the same quota. " + "['client_key'] — Connections with the same key share the same quota. A key must be explicitly provided by a client. " + "When using clickhouse-client, pass a key value in the --quota_key parameter, " + "or use the quota_key parameter in the client configuration file. " + "When using HTTP interface, use the X-ClickHouse-Quota header, " + "['user_name', 'client_key'] — Connections with the same client_key share the same quota. If a key isn't provided by a client, the quota is tracked for `user_name`, " + "['client_key', 'ip_address'] — Connections with the same client_key share the same quota. If a key isn’t provided by a client, the quota is tracked for ip_address." + }, + {"durations", std::make_shared(std::make_shared()), "Time interval lengths in seconds."}, + {"apply_to_all", std::make_shared(), + "Logical value. It shows which users the quota is applied to. Values: " + "0 — The quota applies to users specify in the apply_to_list. " + "1 — The quota applies to all users except those listed in apply_to_except." + }, + {"apply_to_list", std::make_shared(std::make_shared()), "List of user names/roles that the quota should be applied to."}, + {"apply_to_except", std::make_shared(std::make_shared()), "List of user names/roles that the quota should not apply to."} }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemQuotas.h b/src/Storages/System/StorageSystemQuotas.h index 28c873aa734..cafd8b921fa 100644 --- a/src/Storages/System/StorageSystemQuotas.h +++ b/src/Storages/System/StorageSystemQuotas.h @@ -13,7 +13,7 @@ class StorageSystemQuotas final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemQuotasUsage.cpp b/src/Storages/System/StorageSystemQuotasUsage.cpp index a3c97247111..ed22f73dd50 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.cpp +++ b/src/Storages/System/StorageSystemQuotasUsage.cpp @@ -8,9 +8,9 @@ namespace DB { -NamesAndTypesList StorageSystemQuotasUsage::getNamesAndTypes() +ColumnsDescription StorageSystemQuotasUsage::getColumnsDescription() { - return StorageSystemQuotaUsage::getNamesAndTypesImpl(/* add_column_is_current = */ true); + return StorageSystemQuotaUsage::getColumnsDescriptionImpl(/* add_column_is_current = */ true); } void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemQuotasUsage.h b/src/Storages/System/StorageSystemQuotasUsage.h index d60258bbc3f..ecdc62865d1 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.h +++ b/src/Storages/System/StorageSystemQuotasUsage.h @@ -14,7 +14,7 @@ class StorageSystemQuotasUsage final : public IStorageSystemOneBlock()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"progress", std::make_shared()}, - {"result_part_name", std::make_shared()}, - {"result_part_path", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"total_size_bytes_compressed", std::make_shared()}, - {"bytes_read_compressed", std::make_shared()}, - {"source_replica_path", std::make_shared()}, - {"source_replica_hostname", std::make_shared()}, - {"source_replica_port", std::make_shared()}, - {"interserver_scheme", std::make_shared()}, - {"URI", std::make_shared()}, - {"to_detached", std::make_shared()}, - {"thread_id", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"elapsed", std::make_shared(), "The time elapsed (in seconds) since showing currently running background fetches started."}, + {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, + {"result_part_name", std::make_shared(), + "The name of the part that will be formed as the result of showing currently running background fetches."}, + {"result_part_path", std::make_shared(), + "Absolute path to the part that will be formed as the result of showing currently running background fetches."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"total_size_bytes_compressed", std::make_shared(), "The total size (in bytes) of the compressed data in the result part."}, + {"bytes_read_compressed", std::make_shared(), "The number of compressed bytes read from the result part."}, + {"source_replica_path", std::make_shared(), "Absolute path to the source replica."}, + {"source_replica_hostname", std::make_shared(), "Hostname of the source replica."}, + {"source_replica_port", std::make_shared(), "Port number of the source replica."}, + {"interserver_scheme", std::make_shared(), "Name of the interserver scheme."}, + {"URI", std::make_shared(), "Uniform resource identifier."}, + {"to_detached", std::make_shared(), + "The flag indicates whether the currently running background fetch is being performed using the TO DETACHED expression."}, + {"thread_id", std::make_shared(), "Thread identifier."}, }; } diff --git a/src/Storages/System/StorageSystemReplicatedFetches.h b/src/Storages/System/StorageSystemReplicatedFetches.h index d496741b42a..a176912cac0 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.h +++ b/src/Storages/System/StorageSystemReplicatedFetches.h @@ -15,7 +15,7 @@ class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock() }, - { "table", std::make_shared() }, - { "replica_name", std::make_shared() }, + { "database", std::make_shared(), "Name of the database."}, + { "table", std::make_shared(), "Name of the table."}, + { "replica_name", std::make_shared(), + "Replica name in ClickHouse Keeper. Different replicas of the same table have different names."}, /// Constant element properties. - { "position", std::make_shared() }, - { "node_name", std::make_shared() }, - { "type", std::make_shared() }, - { "create_time", std::make_shared() }, - { "required_quorum", std::make_shared() }, - { "source_replica", std::make_shared() }, - { "new_part_name", std::make_shared() }, - { "parts_to_merge", std::make_shared(std::make_shared()) }, - { "is_detach", std::make_shared() }, + { "position", std::make_shared(), "Position of the task in the queue."}, + { "node_name", std::make_shared(), "Node name in ClickHouse Keeper."}, + { "type", std::make_shared(), + "Type of the task in the queue, one of: " + "• GET_PART — Get the part from another replica, " + "• ATTACH_PART — Attach the part, possibly from our own replica (if found in the detached folder). " + "You may think of it as a GET_PART with some optimizations as they're nearly identical, " + "• MERGE_PARTS — Merge the parts, " + "• DROP_RANGE — Delete the parts in the specified partition in the specified number range. " + "• CLEAR_COLUMN — NOTE: Deprecated. Drop specific column from specified partition. " + "• CLEAR_INDEX — NOTE: Deprecated. Drop specific index from specified partition. " + "• REPLACE_RANGE — Drop a certain range of parts and replace them with new ones. " + "• MUTATE_PART — Apply one or several mutations to the part. " + "• ALTER_METADATA — Apply alter modification according to global /metadata and /columns paths." + }, + { "create_time", std::make_shared(), "Date and time when the task was submitted for execution."}, + { "required_quorum", std::make_shared(), "The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the GET_PARTS task."}, + { "source_replica", std::make_shared(), "Name of the source replica."}, + { "new_part_name", std::make_shared(), "Name of the new part."}, + { "parts_to_merge", std::make_shared(std::make_shared()), "Names of parts to merge or update."}, + { "is_detach", std::make_shared(), "The flag indicates whether the DETACH_PARTS task is in the queue."}, /// Processing status of item. - { "is_currently_executing", std::make_shared() }, - { "num_tries", std::make_shared() }, - { "last_exception", std::make_shared() }, - { "last_exception_time", std::make_shared() }, - { "last_attempt_time", std::make_shared() }, - { "num_postponed", std::make_shared() }, - { "postpone_reason", std::make_shared() }, - { "last_postpone_time", std::make_shared() }, - { "merge_type", std::make_shared() }, + { "is_currently_executing", std::make_shared(), "The flag indicates whether a specific task is being performed right now."}, + { "num_tries", std::make_shared(), "The number of failed attempts to complete the task."}, + { "last_exception", std::make_shared(), "Text message about the last error that occurred (if any)."}, + { "last_exception_time", std::make_shared(), "Date and time when the last error occurred."}, + { "last_attempt_time", std::make_shared(), "Date and time when the task was last attempted."}, + { "num_postponed", std::make_shared(), "The number of postponed tasks."}, + { "postpone_reason", std::make_shared(), "The reason why the task was postponed."}, + { "last_postpone_time", std::make_shared(), "Date and time when the task was last postponed."}, + { "merge_type", std::make_shared(), "Type of the current merge. Empty if it's a mutation."}, }; } diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index 23376074d13..003e4eeb927 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -16,7 +16,7 @@ class StorageSystemReplicationQueue final : public IStorageSystemOneBlock(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"granted_role_name", std::make_shared()}, - {"granted_role_id", std::make_shared()}, - {"granted_role_is_default", std::make_shared()}, - {"with_admin_option", std::make_shared()}, + return ColumnsDescription + { + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role name."}, + {"granted_role_name", std::make_shared(), + "Name of role granted to the `role_name` role. To grant one role to another one use `GRANT role1 TO role2`."}, + {"granted_role_id", std::make_shared(), "The ID of the role."}, + {"granted_role_is_default", std::make_shared(), + "Flag that shows whether `granted_role` is a default role. Possible values: " + "• 1 — `granted_role` is a default role, " + "• 0 — `granted_role` is not a default role." + }, + {"with_admin_option", std::make_shared(), + "Flag that shows whether `granted_role` is a role with `ADMIN OPTION` privilege. Possible values: " + "• 1 — The role has `ADMIN OPTION` privilege." + "• 0 — The role without `ADMIN OPTION` privilege." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemRoleGrants.h b/src/Storages/System/StorageSystemRoleGrants.h index 16b27f7a608..969f82f85d5 100644 --- a/src/Storages/System/StorageSystemRoleGrants.h +++ b/src/Storages/System/StorageSystemRoleGrants.h @@ -12,7 +12,7 @@ class StorageSystemRoleGrants final : public IStorageSystemOneBlock()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Role name."}, + {"id", std::make_shared(), "Role ID."}, + {"storage", std::make_shared(), "Path to the storage of roles. Configured in the `access_control_path` parameter."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemRoles.h b/src/Storages/System/StorageSystemRoles.h index d9de9db5c65..2b4ae93a932 100644 --- a/src/Storages/System/StorageSystemRoles.h +++ b/src/Storages/System/StorageSystemRoles.h @@ -12,7 +12,7 @@ class StorageSystemRoles final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index c0bc38edc21..5a959cdf9af 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -21,33 +21,43 @@ namespace DB { -NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() +ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"short_name", std::make_shared()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, + ColumnsDescription description + { + {"name", std::make_shared(), "Name of a row policy."}, + {"short_name", std::make_shared(), + "Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. " + "Here 'myfilter ON mydb.mytable' is the name of the row policy, 'myfilter' is it's short name." + }, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name. Empty if policy for database."}, + {"id", std::make_shared(), "Row policy ID."}, + {"storage", std::make_shared(), "Name of the directory where the row policy is stored."}, }; for (auto filter_type : collections::range(RowPolicyFilterType::MAX)) { const String & column_name = RowPolicyFilterTypeInfo::get(filter_type).name; - names_and_types.push_back({column_name, std::make_shared(std::make_shared())}); + description.add({column_name, std::make_shared(std::make_shared())}); } - NamesAndTypesList extra_names_and_types{ - {"is_restrictive", std::make_shared()}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())} - }; + description.add({"is_restrictive", std::make_shared(), + "Shows whether the row policy restricts access to rows. Value: " + "• 0 — The row policy is defined with `AS PERMISSIVE` clause, " + "• 1 — The row policy is defined with AS RESTRICTIVE clause." + }); + description.add({"apply_to_all", std::make_shared(), + "Shows that the row policies set for all roles and/or users." + }); + description.add({"apply_to_list", std::make_shared(std::make_shared()), + "List of the roles and/or users to which the row policies is applied." + }); + description.add({"apply_to_except", std::make_shared(std::make_shared()), + "The row policies is applied to all roles and/or users excepting of the listed ones." + }); - insertAtEnd(names_and_types, extra_names_and_types); - - return names_and_types; + return description; } diff --git a/src/Storages/System/StorageSystemRowPolicies.h b/src/Storages/System/StorageSystemRowPolicies.h index 9f94f7df65b..f8aa5618126 100644 --- a/src/Storages/System/StorageSystemRowPolicies.h +++ b/src/Storages/System/StorageSystemRowPolicies.h @@ -14,7 +14,7 @@ class StorageSystemRowPolicies final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 33b3dce4a83..3637734b225 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -20,9 +20,11 @@ namespace DB { -NamesAndTypesList StorageSystemS3Queue::getNamesAndTypes() +ColumnsDescription StorageSystemS3Queue::getColumnsDescription() { - return { + /// TODO: Fill in all the comments + return ColumnsDescription + { {"zookeeper_path", std::make_shared()}, {"file_name", std::make_shared()}, {"rows_processed", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemS3Queue.h b/src/Storages/System/StorageSystemS3Queue.h index 1bb4e3694d2..1dc5c521941 100644 --- a/src/Storages/System/StorageSystemS3Queue.h +++ b/src/Storages/System/StorageSystemS3Queue.h @@ -14,7 +14,7 @@ public: std::string getName() const override { return "SystemS3Queue"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 0a6d8f91678..6a9502d0bd8 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -18,35 +18,65 @@ namespace DB { -NamesAndTypesList StorageSystemScheduler::getNamesAndTypes() +ColumnsDescription StorageSystemScheduler::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"resource", std::make_shared()}, - {"path", std::make_shared()}, - {"type", std::make_shared()}, - {"weight", std::make_shared()}, - {"priority", std::make_shared()}, - {"is_active", std::make_shared()}, - {"active_children", std::make_shared()}, - {"dequeued_requests", std::make_shared()}, - {"dequeued_cost", std::make_shared()}, - {"busy_periods", std::make_shared()}, - {"vruntime", std::make_shared(std::make_shared())}, - {"system_vruntime", std::make_shared(std::make_shared())}, - {"queue_length", std::make_shared(std::make_shared())}, - {"queue_cost", std::make_shared(std::make_shared())}, - {"budget", std::make_shared(std::make_shared())}, - {"is_satisfied", std::make_shared(std::make_shared())}, - {"inflight_requests", std::make_shared(std::make_shared())}, - {"inflight_cost", std::make_shared(std::make_shared())}, - {"max_requests", std::make_shared(std::make_shared())}, - {"max_cost", std::make_shared(std::make_shared())}, - {"max_speed", std::make_shared(std::make_shared())}, - {"max_burst", std::make_shared(std::make_shared())}, - {"throttling_us", std::make_shared(std::make_shared())}, - {"tokens", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"resource", std::make_shared(), "Resource name"}, + {"path", std::make_shared(), "Path to a scheduling node within this resource scheduling hierarchy"}, + {"type", std::make_shared(), "Type of a scheduling node."}, + {"weight", std::make_shared(), "Weight of a node, used by a parent node of `fair` type."}, + {"priority", std::make_shared(), "Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority)."}, + {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, + {"active_children", std::make_shared(), "The number of children in active state."}, + {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, + {"vruntime", std::make_shared(std::make_shared()), + "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, + {"system_vruntime", std::make_shared(std::make_shared()), + "For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. " + "Used during child activation as the new value of `vruntime`." + }, + {"queue_length", std::make_shared(std::make_shared()), + "For `fifo` nodes only. Current number of resource requests residing in the queue." + }, + {"queue_cost", std::make_shared(std::make_shared()), + "For fifo nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue." + }, + {"budget", std::make_shared(std::make_shared()), + "For fifo nodes only. The number of available 'cost units' for new resource requests. " + "Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)" + }, + {"is_satisfied", std::make_shared(std::make_shared()), + "For constraint nodes only (e.g. `inflight_limit`). Equals to `1` if all the constraint of this node are satisfied." + }, + {"inflight_requests", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state." + }, + {"inflight_cost", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. " + "The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state." + }, + {"max_requests", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. Upper limit for inflight_requests leading to constraint violation." + }, + {"max_cost", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. Upper limit for inflight_cost leading to constraint violation." + }, + {"max_speed", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Upper limit for bandwidth in tokens per second." + }, + {"max_burst", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Upper limit for tokens available in token-bucket throttler." + }, + {"throttling_us", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Total number of microseconds this node was in throttling state." + }, + {"tokens", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Number of tokens currently available in token-bucket throttler." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemScheduler.h b/src/Storages/System/StorageSystemScheduler.h index 31d14862209..1de72a85e9b 100644 --- a/src/Storages/System/StorageSystemScheduler.h +++ b/src/Storages/System/StorageSystemScheduler.h @@ -12,7 +12,7 @@ class StorageSystemScheduler final : public IStorageSystemOneBlock()}, - {"source", std::make_shared()}, - {"format", std::make_shared()}, - {"additional_format_info", std::make_shared()}, - {"registration_time", std::make_shared()}, - {"schema", std::make_shared(std::make_shared())}, - {"number_of_rows", std::make_shared(std::make_shared())}, - {"schema_inference_mode", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"storage", std::make_shared(), "Storage name: File, URL, S3 or HDFS."}, + {"source", std::make_shared(), "File source."}, + {"format", std::make_shared(), "Format name."}, + {"additional_format_info", std::make_shared(), + "Additional information required to identify the schema. For example, format specific settings." + }, + {"registration_time", std::make_shared(), "Timestamp when schema was added in cache."}, + {"schema", std::make_shared(std::make_shared()), "Cached schema."}, + {"number_of_rows", std::make_shared(std::make_shared()), "Number of rows in the file in given format. It's used for caching trivial count() from data files and for caching number of rows from the metadata during schema inference."}, + {"schema_inference_mode", std::make_shared(std::make_shared()), "Scheme inference mode."}, }; } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.h b/src/Storages/System/StorageSystemSchemaInferenceCache.h index e3afc6e1e38..e6d306f8252 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.h +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.h @@ -11,7 +11,7 @@ class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock( DataTypeEnum8::Values @@ -42,15 +42,16 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() {"Yes", static_cast(ChangeableWithoutRestart::Yes)}, }); - return { - {"name", std::make_shared()}, - {"value", std::make_shared()}, - {"default", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"type", std::make_shared()}, - {"changeable_without_restart", std::move(changeable_without_restart_type)}, - {"is_obsolete", std::make_shared()} + return ColumnsDescription + { + {"name", std::make_shared(), "Server setting name."}, + {"value", std::make_shared(), "Server setting value."}, + {"default", std::make_shared(), "Server setting default value."}, + {"changed", std::make_shared(), "Shows whether a setting was specified in config.xml"}, + {"description", std::make_shared(), "Short server setting description."}, + {"type", std::make_shared(), "Server setting value type."}, + {"changeable_without_restart", std::move(changeable_without_restart_type), "Shows whether a setting can be changed at runtime."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."} }; } diff --git a/src/Storages/System/StorageSystemServerSettings.h b/src/Storages/System/StorageSystemServerSettings.h index b3aa8055853..276f21d674b 100644 --- a/src/Storages/System/StorageSystemServerSettings.h +++ b/src/Storages/System/StorageSystemServerSettings.h @@ -16,7 +16,7 @@ class StorageSystemServerSettings final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, - {"readonly", std::make_shared()}, - {"type", std::make_shared()}, - {"default", std::make_shared()}, - {"alias_for", std::make_shared()}, - {"is_obsolete", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting name."}, + {"value", std::make_shared(), "Setting value."}, + {"changed", std::make_shared(), "Shows whether a setting is changed from its default value."}, + {"description", std::make_shared(), "Short setting description."}, + {"min", std::make_shared(std::make_shared()), + "Minimum value of the setting, if any is set via constraints. If the setting has no minimum value, contains NULL." + }, + {"max", std::make_shared(std::make_shared()), + "Maximum value of the setting, if any is set via constraints. If the setting has no maximum value, contains NULL." + }, + {"readonly", std::make_shared(), + "Shows whether the current user can change the setting: " + "0 — Current user can change the setting, " + "1 — Current user can't change the setting." + }, + {"type", std::make_shared(), "The type of the value that can be assigned to this setting."}, + {"default", std::make_shared(), "Setting default value."}, + {"alias_for", std::make_shared(), "Flag that shows whether this name is an alias to another setting."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."}, }; } diff --git a/src/Storages/System/StorageSystemSettings.h b/src/Storages/System/StorageSystemSettings.h index 201b79ac309..6749f9b20a4 100644 --- a/src/Storages/System/StorageSystemSettings.h +++ b/src/Storages/System/StorageSystemSettings.h @@ -16,7 +16,7 @@ class StorageSystemSettings final : public IStorageSystemOneBlock()}, {"changes", std::make_shared(std::make_shared( diff --git a/src/Storages/System/StorageSystemSettingsChanges.h b/src/Storages/System/StorageSystemSettingsChanges.h index 283487df51b..3a1a8ce23d1 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.h +++ b/src/Storages/System/StorageSystemSettingsChanges.h @@ -17,7 +17,7 @@ class StorageSystemSettingsChanges final : public IStorageSystemOneBlock> & getSettingConstraintWritabilityEnum return values; } -NamesAndTypesList StorageSystemSettingsProfileElements::getNamesAndTypes() +ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"profile_name", std::make_shared(std::make_shared())}, - {"user_name", std::make_shared(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"index", std::make_shared()}, - {"setting_name", std::make_shared(std::make_shared())}, - {"value", std::make_shared(std::make_shared())}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"profile_name", std::make_shared(std::make_shared()), "Setting profile name."}, + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role name."}, + {"index", std::make_shared(), "Sequential number of the settings profile element."}, + {"setting_name", std::make_shared(std::make_shared()), "Setting name."}, + {"value", std::make_shared(std::make_shared()), "Setting value."}, + {"min", std::make_shared(std::make_shared()), "The minimum value of the setting. NULL if not set."}, + {"max", std::make_shared(std::make_shared()), "The maximum value of the setting. NULL if not set."}, {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues()))}, - {"inherit_profile", std::make_shared(std::make_shared())}, + {"inherit_profile", std::make_shared(std::make_shared()), + "A parent profile for this setting profile. NULL if not set. " + "Setting profile will inherit all the settings' values and constraints (min, max, readonly) from its parent profiles." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.h b/src/Storages/System/StorageSystemSettingsProfileElements.h index 58c792c591c..1dedd616c82 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.h +++ b/src/Storages/System/StorageSystemSettingsProfileElements.h @@ -12,7 +12,7 @@ class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfileElements"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 069c8762154..01041bee445 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -17,18 +17,22 @@ namespace DB { -NamesAndTypesList StorageSystemSettingsProfiles::getNamesAndTypes() +ColumnsDescription StorageSystemSettingsProfiles::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"num_elements", std::make_shared()}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting profile name."}, + {"id", std::make_shared(), "Setting profile ID."}, + {"storage", std::make_shared(), "Path to the storage of setting profiles. Configured in the `access_control_path` parameter."}, + {"num_elements", std::make_shared(), "Number of elements for this profile in the `system.settings_profile_elements` table."}, + {"apply_to_all", std::make_shared(), "Shows that the settings profile set for all roles and/or users."}, + {"apply_to_list", std::make_shared(std::make_shared()), + "List of the roles and/or users to which the setting profile is applied." + }, + {"apply_to_except", std::make_shared(std::make_shared()), + "The setting profile is applied to all roles and/or users excepting of the listed ones." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.h b/src/Storages/System/StorageSystemSettingsProfiles.h index 6edaa02a4c3..b0c8fc8658c 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.h +++ b/src/Storages/System/StorageSystemSettingsProfiles.h @@ -12,7 +12,7 @@ class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemTableEngines.cpp b/src/Storages/System/StorageSystemTableEngines.cpp index e74b2670426..c0cf95423d9 100644 --- a/src/Storages/System/StorageSystemTableEngines.cpp +++ b/src/Storages/System/StorageSystemTableEngines.cpp @@ -6,18 +6,23 @@ namespace DB { -NamesAndTypesList StorageSystemTableEngines::getNamesAndTypes() +ColumnsDescription StorageSystemTableEngines::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"supports_settings", std::make_shared()}, - {"supports_skipping_indices", std::make_shared()}, - {"supports_projections", std::make_shared()}, - {"supports_sort_order", std::make_shared()}, - {"supports_ttl", std::make_shared()}, - {"supports_replication", std::make_shared()}, - {"supports_deduplication", std::make_shared()}, - {"supports_parallel_insert", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of table engine."}, + {"supports_settings", std::make_shared(), "Flag that indicates if table engine supports SETTINGS clause."}, + {"supports_skipping_indices", std::make_shared(), "Flag that indicates if table engine supports skipping indices."}, + {"supports_projections", std::make_shared(), "Flag that indicated if table engine supports projections."}, + {"supports_sort_order", std::make_shared(), + "Flag that indicates if table engine supports clauses PARTITION_BY, PRIMARY_KEY, ORDER_BY and SAMPLE_BY." + }, + {"supports_ttl", std::make_shared(), "Flag that indicates if table engine supports TTL."}, + {"supports_replication", std::make_shared(), "Flag that indicates if table engine supports data replication."}, + {"supports_deduplication", std::make_shared(), "Flag that indicates if table engine supports data deduplication."}, + {"supports_parallel_insert", std::make_shared(), + "Flag that indicates if table engine supports parallel insert (see max_insert_threads setting)." + }, }; } diff --git a/src/Storages/System/StorageSystemTableEngines.h b/src/Storages/System/StorageSystemTableEngines.h index f2849848ea7..258b9d210b1 100644 --- a/src/Storages/System/StorageSystemTableEngines.h +++ b/src/Storages/System/StorageSystemTableEngines.h @@ -19,7 +19,7 @@ public: return "SystemTableEngines"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTableFunctions.cpp b/src/Storages/System/StorageSystemTableFunctions.cpp index 07a504edc5e..9fb8e11e4d1 100644 --- a/src/Storages/System/StorageSystemTableFunctions.cpp +++ b/src/Storages/System/StorageSystemTableFunctions.cpp @@ -10,14 +10,14 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; } -NamesAndTypesList StorageSystemTableFunctions::getNamesAndTypes() +ColumnsDescription StorageSystemTableFunctions::getColumnsDescription() { - return - { - {"name", std::make_shared()}, - {"description", std::make_shared()}, - {"allow_readonly", std::make_shared()} - }; + return ColumnsDescription + { + {"name", std::make_shared(), "Name of a table function."}, + {"description", std::make_shared(), "Brief description of a table function."}, + {"allow_readonly", std::make_shared(), "Flag that indicated whether a readonly user may use this function."} + }; } void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemTableFunctions.h b/src/Storages/System/StorageSystemTableFunctions.h index a74e2968731..804c3b51940 100644 --- a/src/Storages/System/StorageSystemTableFunctions.h +++ b/src/Storages/System/StorageSystemTableFunctions.h @@ -19,7 +19,7 @@ public: return "SystemTableFunctions"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d2c01ec3dea..92ae643db55 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -33,42 +33,71 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription({ - {"database", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine", std::make_shared()}, - {"is_temporary", std::make_shared()}, - {"data_paths", std::make_shared(std::make_shared())}, - {"metadata_path", std::make_shared()}, - {"metadata_modification_time", std::make_shared()}, - {"dependencies_database", std::make_shared(std::make_shared())}, - {"dependencies_table", std::make_shared(std::make_shared())}, - {"create_table_query", std::make_shared()}, - {"engine_full", std::make_shared()}, - {"as_select", std::make_shared()}, - {"partition_key", std::make_shared()}, - {"sorting_key", std::make_shared()}, - {"primary_key", std::make_shared()}, - {"sampling_key", std::make_shared()}, - {"storage_policy", std::make_shared()}, - {"total_rows", std::make_shared(std::make_shared())}, - {"total_bytes", std::make_shared(std::make_shared())}, - {"total_bytes_uncompressed", std::make_shared(std::make_shared())}, + + auto description = ColumnsDescription{ + {"database", std::make_shared(), "The name of the database the table is in."}, + {"name", std::make_shared(), "Table name."}, + {"uuid", std::make_shared(), "Table uuid (Atomic database)."}, + {"engine", std::make_shared(), "Table engine name (without parameters)."}, + {"is_temporary", std::make_shared(), "Flag that indicates whether the table is temporary."}, + {"data_paths", std::make_shared(std::make_shared()), "Paths to the table data in the file systems."}, + {"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, + {"metadata_modification_time", std::make_shared(), "Time of latest modification of the table metadata."}, + {"dependencies_database", std::make_shared(std::make_shared()), "Database dependencies."}, + {"dependencies_table", std::make_shared(std::make_shared()), "Table dependencies (materialized views the current table)."}, + {"create_table_query", std::make_shared(), "The query that was used to create the table."}, + {"engine_full", std::make_shared(), "Parameters of the table engine."}, + {"as_select", std::make_shared(), "SELECT query for view."}, + {"partition_key", std::make_shared(), "The partition key expression specified in the table."}, + {"sorting_key", std::make_shared(), "The sorting key expression specified in the table."}, + {"primary_key", std::make_shared(), "The primary key expression specified in the table."}, + {"sampling_key", std::make_shared(), "The sampling key expression specified in the table."}, + {"storage_policy", std::make_shared(), "The storage policy."}, + {"total_rows", std::make_shared(std::make_shared()), + "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including underlying Buffer table)." + }, + {"total_bytes", std::make_shared(std::make_shared()), + "Total number of bytes, if it is possible to quickly determine exact number " + "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " + "If the table stores data on disk, returns used space on disk (i.e. compressed). " + "If the table stores data in memory, returns approximated number of used bytes in memory." + }, + {"total_bytes_uncompressed", std::make_shared(std::make_shared()), + "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " + "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." + }, {"parts", std::make_shared(std::make_shared())}, {"active_parts", std::make_shared(std::make_shared())}, {"total_marks", std::make_shared(std::make_shared())}, - {"lifetime_rows", std::make_shared(std::make_shared())}, - {"lifetime_bytes", std::make_shared(std::make_shared())}, - {"comment", std::make_shared()}, - {"has_own_data", std::make_shared()}, - {"loading_dependencies_database", std::make_shared(std::make_shared())}, - {"loading_dependencies_table", std::make_shared(std::make_shared())}, - {"loading_dependent_database", std::make_shared(std::make_shared())}, - {"loading_dependent_table", std::make_shared(std::make_shared())}, - }, { + {"lifetime_rows", std::make_shared(std::make_shared()), + "Total number of rows INSERTed since server start (only for Buffer tables)." + }, + {"lifetime_bytes", std::make_shared(std::make_shared()), + "Total number of bytes INSERTed since server start (only for Buffer tables)." + }, + {"comment", std::make_shared(), "The comment for the table."}, + {"has_own_data", std::make_shared(), + "Flag that indicates whether the table itself stores some data on disk or only accesses some other source." + }, + {"loading_dependencies_database", std::make_shared(std::make_shared()), + "Database loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependencies_table", std::make_shared(std::make_shared()), + "Table loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependent_database", std::make_shared(std::make_shared()), + "Dependent loading database." + }, + {"loading_dependent_table", std::make_shared(std::make_shared()), + "Dependent loading table." + }, + }; + + description.setAliases({ {"table", std::make_shared(), "name"} - })); + }); + + storage_metadata.setColumns(std::move(description)); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index e0d7d2a5c42..14f4ce0f5de 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -8,11 +8,11 @@ extern const char * auto_time_zones[]; namespace DB { -NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() +ColumnsDescription StorageSystemTimeZones::getColumnsDescription() { - return + return ColumnsDescription { - {"time_zone", std::make_shared()}, + {"time_zone", std::make_shared(), "List of supported time zones."}, }; } diff --git a/src/Storages/System/StorageSystemTimeZones.h b/src/Storages/System/StorageSystemTimeZones.h index cd54470b07c..f3743a1ef09 100644 --- a/src/Storages/System/StorageSystemTimeZones.h +++ b/src/Storages/System/StorageSystemTimeZones.h @@ -20,6 +20,6 @@ public: public: std::string getName() const override { return "SystemTimeZones"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index 21fa72ea12a..47e44688c14 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -21,9 +21,11 @@ static DataTypePtr getStateEnumType() }); } -NamesAndTypesList StorageSystemTransactions::getNamesAndTypes() +ColumnsDescription StorageSystemTransactions::getColumnsDescription() { - return { + /// TODO: Fill in all the comments. + return ColumnsDescription + { {"tid", getTransactionIDDataType()}, {"tid_hash", std::make_shared()}, {"elapsed", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemTransactions.h b/src/Storages/System/StorageSystemTransactions.h index 128acd04367..b5a538b7b55 100644 --- a/src/Storages/System/StorageSystemTransactions.h +++ b/src/Storages/System/StorageSystemTransactions.h @@ -13,9 +13,7 @@ class StorageSystemTransactions final : public IStorageSystemOneBlock()}, {"type", std::make_shared()}, {"params", std::make_shared()}, {"precedence", std::make_shared()}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemUserDirectories.h b/src/Storages/System/StorageSystemUserDirectories.h index 0c5f4f14c7e..bca6a9b5aa6 100644 --- a/src/Storages/System/StorageSystemUserDirectories.h +++ b/src/Storages/System/StorageSystemUserDirectories.h @@ -12,7 +12,7 @@ class StorageSystemUserDirectories final : public IStorageSystemOneBlock()}, {"memory_usage", std::make_shared()}, {"peak_memory_usage", std::make_shared()}, {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, }; -} -NamesAndAliases StorageSystemUserProcesses::getNamesAndAliases() -{ - return { + description.setAliases({ {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, - {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}}; + {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"} + }); + + return description; } void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemUserProcesses.h b/src/Storages/System/StorageSystemUserProcesses.h index 9bdc009d849..6eb12e30559 100644 --- a/src/Storages/System/StorageSystemUserProcesses.h +++ b/src/Storages/System/StorageSystemUserProcesses.h @@ -16,9 +16,7 @@ class StorageSystemUserProcesses final : public IStorageSystemOneBlock()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"auth_type", std::make_shared(getAuthenticationTypeEnumValues())}, - {"auth_params", std::make_shared()}, - {"host_ip", std::make_shared(std::make_shared())}, - {"host_names", std::make_shared(std::make_shared())}, - {"host_names_regexp", std::make_shared(std::make_shared())}, - {"host_names_like", std::make_shared(std::make_shared())}, - {"default_roles_all", std::make_shared()}, - {"default_roles_list", std::make_shared(std::make_shared())}, - {"default_roles_except", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "User name."}, + {"id", std::make_shared(), "User ID."}, + {"storage", std::make_shared(), "Path to the storage of users. Configured in the access_control_path parameter."}, + {"auth_type", std::make_shared(getAuthenticationTypeEnumValues()), + "Shows the authentication type. " + "There are multiple ways of user identification: " + "with no password, with plain text password, with SHA256-encoded password, " + "with double SHA-1-encoded password or with bcrypt-encoded password." + }, + {"auth_params", std::make_shared(), "Authentication parameters in the JSON format depending on the auth_type."}, + {"host_ip", std::make_shared(std::make_shared()), + "IP addresses of hosts that are allowed to connect to the ClickHouse server." + }, + {"host_names", std::make_shared(std::make_shared()), + "Names of hosts that are allowed to connect to the ClickHouse server." + }, + {"host_names_regexp", std::make_shared(std::make_shared()), + "Regular expression for host names that are allowed to connect to the ClickHouse server." + }, + {"host_names_like", std::make_shared(std::make_shared()), + "Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate." + }, + {"default_roles_all", std::make_shared(), + "Shows that all granted roles set for user by default." + }, + {"default_roles_list", std::make_shared(std::make_shared()), + "List of granted roles provided by default." + }, + {"default_roles_except", std::make_shared(std::make_shared()), + "All the granted roles set as default excepting of the listed ones." + }, {"grantees_any", std::make_shared()}, {"grantees_list", std::make_shared(std::make_shared())}, {"grantees_except", std::make_shared(std::make_shared())}, {"default_database", std::make_shared()}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemUsers.h b/src/Storages/System/StorageSystemUsers.h index 536f0482480..cfa5947d370 100644 --- a/src/Storages/System/StorageSystemUsers.h +++ b/src/Storages/System/StorageSystemUsers.h @@ -12,7 +12,7 @@ class StorageSystemUsers final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index f27cb3147c8..d2b933e65a8 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -13,25 +13,33 @@ namespace DB { -NamesAndTypesList StorageSystemViewRefreshes::getNamesAndTypes() +ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"view", std::make_shared()}, - {"status", std::make_shared()}, - {"last_refresh_result", std::make_shared()}, - {"last_refresh_time", std::make_shared(std::make_shared())}, - {"last_success_time", std::make_shared(std::make_shared())}, - {"duration_ms", std::make_shared()}, - {"next_refresh_time", std::make_shared()}, - {"remaining_dependencies", std::make_shared(std::make_shared())}, - {"exception", std::make_shared()}, - {"refresh_count", std::make_shared()}, - {"progress", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "The name of the database the table is in."}, + {"view", std::make_shared(), "Table name."}, + {"status", std::make_shared(), "Current state of the refresh."}, + {"last_refresh_result", std::make_shared(), "Outcome of the latest refresh attempt."}, + {"last_refresh_time", std::make_shared(std::make_shared()), + "Time of the last refresh attempt. NULL if no refresh attempts happened since server startup or table creation."}, + {"last_success_time", std::make_shared(std::make_shared()), + "Time of the last successful refresh. NULL if no successful refreshes happened since server startup or table creation."}, + {"duration_ms", std::make_shared(), "How long the last refresh attempt took."}, + {"next_refresh_time", std::make_shared(), "Time at which the next refresh is scheduled to start."}, + {"remaining_dependencies", std::make_shared(std::make_shared()), + "If the view has refresh dependencies, this array contains the subset of those dependencies that are not satisfied for the current refresh yet. " + "If status = 'WaitingForDependencies', a refresh is ready to start as soon as these dependencies are fulfilled." + }, + {"exception", std::make_shared(), + "if last_refresh_result = 'Exception', i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace." + }, + {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, + {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, {"elapsed", std::make_shared()}, - {"read_rows", std::make_shared()}, + {"read_rows", std::make_shared(), "Number of rows read by the current refresh so far."}, {"read_bytes", std::make_shared()}, - {"total_rows", std::make_shared()}, + {"total_rows", std::make_shared(), "Estimated total number of rows that need to be read by the current refresh."}, {"total_bytes", std::make_shared()}, {"written_rows", std::make_shared()}, {"written_bytes", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemViewRefreshes.h b/src/Storages/System/StorageSystemViewRefreshes.h index 475ad45e68f..02d3a39dfff 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.h +++ b/src/Storages/System/StorageSystemViewRefreshes.h @@ -15,7 +15,7 @@ class StorageSystemViewRefreshes final : public IStorageSystemOneBlock()}, + return ColumnsDescription + { + {"message", std::make_shared(), "A warning message issued by ClickHouse server."}, }; } diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h index 3f403f5f1f9..42948a765ea 100644 --- a/src/Storages/System/StorageSystemWarnings.h +++ b/src/Storages/System/StorageSystemWarnings.h @@ -16,7 +16,7 @@ class StorageSystemWarnings final : public IStorageSystemOneBlock()}})); + storage_metadata.setColumns(ColumnsDescription({{"zero", std::make_shared(), "dummy"}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index ba069380855..37fe9074950 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -198,17 +198,7 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - ColumnsDescription desc; - auto columns = getNamesAndTypes(); - for (const auto & col : columns) - { - ColumnDescription col_desc(col.name, col.type); - /// We only allow column `name`, `path`, `value` to insert. - if (col.name != "name" && col.name != "path" && col.name != "value") - col_desc.default_desc.kind = ColumnDefaultKind::Materialized; - desc.add(col_desc); - } - storage_metadata.setColumns(desc); + storage_metadata.setColumns(getColumnsDescription()); setInMemoryMetadata(storage_metadata); } @@ -238,24 +228,37 @@ SinkToStoragePtr StorageSystemZooKeeper::write(const ASTPtr &, const StorageMeta return std::make_shared(write_header, context); } -NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() +ColumnsDescription StorageSystemZooKeeper::getColumnsDescription() { - return { - { "name", std::make_shared() }, - { "value", std::make_shared() }, - { "czxid", std::make_shared() }, - { "mzxid", std::make_shared() }, - { "ctime", std::make_shared() }, - { "mtime", std::make_shared() }, - { "version", std::make_shared() }, - { "cversion", std::make_shared() }, - { "aversion", std::make_shared() }, - { "ephemeralOwner", std::make_shared() }, - { "dataLength", std::make_shared() }, - { "numChildren", std::make_shared() }, - { "pzxid", std::make_shared() }, - { "path", std::make_shared() }, + auto description = ColumnsDescription + { + {"name", std::make_shared(), "The name of the node."}, + {"value", std::make_shared(), "Node value."}, + {"czxid", std::make_shared(), "ID of the transaction that created the node."}, + {"mzxid", std::make_shared(), "ID of the transaction that last changed the node."}, + {"ctime", std::make_shared(), "Time of node creation."}, + {"mtime", std::make_shared(), "Time of the last modification of the node."}, + {"version", std::make_shared(), "Node version: the number of times the node was changed."}, + {"cversion", std::make_shared(), "Number of added or removed descendants."}, + {"aversion", std::make_shared(), "Number of changes to the ACL."}, + {"ephemeralOwner", std::make_shared(), "For ephemeral nodes, the ID of the session that owns this node."}, + {"dataLength", std::make_shared(), "Size of the value."}, + {"numChildren", std::make_shared(), "Number of descendants."}, + {"pzxid", std::make_shared(), "ID of the transaction that last deleted or added descendants."}, + {"path", std::make_shared(), "The path to the node."}, }; + + for (auto & name : description.getAllRegisteredNames()) + { + description.modify(name, [&](ColumnDescription & column) + { + /// We only allow column `name`, `path`, `value` to insert. + if (column.name != "name" && column.name != "path" && column.name != "value") + column.default_desc.kind = ColumnDefaultKind::Materialized; + }); + } + + return description; } static String pathCorrected(const String & path) diff --git a/src/Storages/System/StorageSystemZooKeeper.h b/src/Storages/System/StorageSystemZooKeeper.h index 7f7aba862a2..3c893a2fddc 100644 --- a/src/Storages/System/StorageSystemZooKeeper.h +++ b/src/Storages/System/StorageSystemZooKeeper.h @@ -18,7 +18,7 @@ public: std::string getName() const override { return "SystemZooKeeper"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/, bool /*async_insert*/) override; diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 9d9a8763db2..c165bfa217d 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -11,7 +11,7 @@ namespace DB { -NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() +ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() { DataTypeEnum16::Values feature_flags_enum_values; feature_flags_enum_values.reserve(magic_enum::enum_count()); @@ -20,18 +20,21 @@ NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() auto feature_flags_enum = std::make_shared(std::move(feature_flags_enum_values)); - return { - /* 0 */ {"name", std::make_shared()}, - /* 1 */ {"host", std::make_shared()}, - /* 2 */ {"port", std::make_shared()}, - /* 3 */ {"index", std::make_shared()}, - /* 4 */ {"connected_time", std::make_shared()}, - /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared()}, - /* 6 */ {"is_expired", std::make_shared()}, - /* 7 */ {"keeper_api_version", std::make_shared()}, - /* 8 */ {"client_id", std::make_shared()}, - /* 9 */ {"xid", std::make_shared()}, - /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum))} + return ColumnsDescription + { + /* 0 */ {"name", std::make_shared(), "ZooKeeper cluster's name."}, + /* 1 */ {"host", std::make_shared(), "The hostname/IP of the ZooKeeper node that ClickHouse connected to."}, + /* 2 */ {"port", std::make_shared(), "The port of the ZooKeeper node that ClickHouse connected to."}, + /* 3 */ {"index", std::make_shared(), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config."}, + /* 4 */ {"connected_time", std::make_shared(), "When the connection was established."}, + /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared(), "Seconds elapsed since the connection was established."}, + /* 6 */ {"is_expired", std::make_shared(), "Is the current connection expired."}, + /* 7 */ {"keeper_api_version", std::make_shared(), "Keeper API version."}, + /* 8 */ {"client_id", std::make_shared(), "Session id of the connection."}, + /* 9 */ {"xid", std::make_shared(), "XID of the current session."}, + /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum)), + "Feature flags which are enabled. Only applicable to ClickHouse Keeper." + } }; } diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.h b/src/Storages/System/StorageSystemZooKeeperConnection.h index dd4c293c112..2b6d3b2e516 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.h +++ b/src/Storages/System/StorageSystemZooKeeperConnection.h @@ -16,7 +16,7 @@ class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock #include #include +#include #include #include #include @@ -106,122 +107,130 @@ #include #endif +#if USE_MYSQL +#include +#endif + namespace DB { void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, bool has_zookeeper) { - attach(context, system_database, "one"); - attach(context, system_database, "numbers", false); - attach(context, system_database, "numbers_mt", true); - attach(context, system_database, "zeros", false); - attach(context, system_database, "zeros_mt", true); - attach(context, system_database, "databases"); - attach(context, system_database, "tables"); - attach(context, system_database, "columns"); - attach(context, system_database, "functions"); - attach(context, system_database, "events"); - attach(context, system_database, "settings"); - attach(context, system_database, "server_settings"); - attach(context, system_database, "settings_changes"); - attach>(context, system_database, "merge_tree_settings"); - attach>(context, system_database, "replicated_merge_tree_settings"); - attach(context, system_database, "build_options"); - attach(context, system_database, "formats"); - attach(context, system_database, "table_functions"); - attach(context, system_database, "aggregate_function_combinators"); - attach(context, system_database, "data_type_families"); - attach(context, system_database, "collations"); - attach(context, system_database, "database_engines"); - attach(context, system_database, "table_engines"); - attach(context, system_database, "contributors"); - attach(context, system_database, "users"); - attach(context, system_database, "roles"); - attach(context, system_database, "grants"); - attach(context, system_database, "role_grants"); - attach(context, system_database, "current_roles"); - attach(context, system_database, "enabled_roles"); - attach(context, system_database, "settings_profiles"); - attach(context, system_database, "settings_profile_elements"); - attach(context, system_database, "row_policies"); - attach(context, system_database, "quotas"); - attach(context, system_database, "quota_limits"); - attach(context, system_database, "quota_usage"); - attach(context, system_database, "quotas_usage"); - attach(context, system_database, "user_directories"); - attach(context, system_database, "privileges"); - attach(context, system_database, "errors"); - attach(context, system_database, "warnings"); - attach(context, system_database, "data_skipping_indices"); - attach(context, system_database, "licenses"); - attach(context, system_database, "time_zones"); - attach(context, system_database, "backups"); - attach(context, system_database, "schema_inference_cache"); - attach(context, system_database, "dropped_tables"); - attach(context, system_database, "scheduler"); + attach(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); + attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); + attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); + attach(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); + attach(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); + attach(context, system_database, "databases", "Lists all databases of the current server."); + attach(context, system_database, "tables", "Lists all tables of the current server."); + attach(context, system_database, "columns", "Lists all columns from all tables of the current server."); + attach(context, system_database, "functions", "Contains a list of all available ordinary and aggregate functions with their descriptions."); + attach(context, system_database, "events", "Contains profiling events and their current value."); + attach(context, system_database, "settings", "Contains a list of all user-level settings (which can be modified in a scope of query or session), their current and default values along with descriptions."); + attach(context, system_database, "server_settings", "Contains a list of all server-wide settings (which are effective only on server startup and usually cannot be modified at runtime), their current and default values along with descriptions."); + attach(context, system_database, "settings_changes", "Contains the information about the settings changes through different ClickHouse versions. You may make ClickHouse behave like a particular previous version by changing the `compatibility` user-level settings."); + attach>(context, system_database, "merge_tree_settings", "Contains a list of all MergeTree engine specific settings, their current and default values along with descriptions. You may change any of them in SETTINGS section in CREATE query."); + attach>(context, system_database, "replicated_merge_tree_settings", "Contains a list of all ReplicatedMergeTree engine specific settings, their current and default values along with descriptions. You may change any of them in SETTINGS section in CREATE query. "); + attach(context, system_database, "build_options", "Contains a list of all build flags, compiler options and commit hash for used build."); + attach(context, system_database, "formats", "Contains a list of all the formats along with flags whether a format is suitable for input/output or whether it supports parallelization."); + attach(context, system_database, "table_functions", "Contains a list of all available table functions with their descriptions."); + attach(context, system_database, "aggregate_function_combinators", "Contains a list of all available aggregate function combinators, which could be applied to aggregate functions and change the way they work."); + attach(context, system_database, "data_type_families", "Contains a list of all available native data types along with all the aliases used for compatibility with other DBMS."); + attach(context, system_database, "collations", "Contains a list of all available collations for alphabetical comparison of strings."); + attach(context, system_database, "database_engines", "Contains a list of all available database engines"); + attach(context, system_database, "table_engines", "Contains a list of all available table engines along with information whether a particular table engine supports some specific features (e.g. settings, skipping indices, projections, replication, TTL, deduplication, parallel insert, etc.)"); + attach(context, system_database, "contributors", "Contains a list of all ClickHouse contributors <3"); + attach(context, system_database, "users", "Contains a list of all users profiles either configured at the server through a configuration file or created via SQL."); + attach(context, system_database, "roles", "Contains a list of all roles created at the server."); + attach(context, system_database, "grants", "Contains the information about privileges granted to ClickHouse user accounts."); + attach(context, system_database, "role_grants", "Contains the role grants for users and roles. To add entries to this table, use `GRANT role TO user`. Using this table you may find out which roles are assigned to which users or which roles a user has."); + attach(context, system_database, "current_roles", "Contains active roles of a current user. SET ROLE changes the contents of this table."); + attach(context, system_database, "enabled_roles", "Contains all active roles at the moment, including current role of the current user and granted roles for current role."); + attach(context, system_database, "settings_profiles", "Contains properties of configured setting profiles."); + attach(context, system_database, "settings_profile_elements", "Describes the content of each settings profile configured on the server. Including settings constraints, roles and users for which the settings are applied, and parent settings profiles."); + attach(context, system_database, "row_policies", "Contains filters for one particular table, as well as a list of roles and/or users which should use this row policy."); + attach(context, system_database, "quotas", "Contains information about quotas."); + attach(context, system_database, "quota_limits", "Contains information about maximums for all intervals of all quotas. Any number of rows or zero can correspond to specific quota."); + attach(context, system_database, "quota_usage", "Contains quota usage by the current user: how much is used and how much is left."); + attach(context, system_database, "quotas_usage", "Contains quota usage by all users."); + attach(context, system_database, "user_directories", "Contains the information about configured user directories - directories on the file system from which ClickHouse server is allowed to read user provided data."); + attach(context, system_database, "privileges", "Contains a list of all available privileges that could be granted to a user or role."); + attach(context, system_database, "errors", "Contains a list of all errors which have ever happened including the error code, last time and message with unsymbolized stacktrace."); + attach(context, system_database, "warnings", "Contains warnings about server configuration to be displayed by clickhouse-client right after it connects to the server."); + attach(context, system_database, "data_skipping_indices", "Contains all the information about all the data skipping indices in tables, similar to system.columns."); + attach(context, system_database, "licenses", "Contains licenses of third-party libraries that are located in the contrib directory of ClickHouse sources."); + attach(context, system_database, "time_zones", "Contains a list of time zones that are supported by the ClickHouse server. This list of timezones might vary depending on the version of ClickHouse."); + attach(context, system_database, "backups", "Contains a list of all BACKUP or RESTORE operations with their current states and other propertis. Note, that table is not persistent and it shows only operations executed after the last server restart."); + attach(context, system_database, "schema_inference_cache", "Contains information about all cached file schemas."); + attach(context, system_database, "dropped_tables", "Contains a list of tables which were dropped from Atomic databases but not completely removed yet."); + attach(context, system_database, "dropped_tables_parts", "Contains parts of system.dropped_tables tables "); + attach(context, system_database, "scheduler", "Contains information and status for scheduling nodes residing on the local server."); #if defined(__ELF__) && !defined(OS_FREEBSD) - attach(context, system_database, "symbols"); + attach(context, system_database, "symbols", "Contains information for introspection of ClickHouse binary. This table is only useful for C++ experts and ClickHouse engineers."); #endif #if USE_RDKAFKA - attach(context, system_database, "kafka_consumers"); + attach(context, system_database, "kafka_consumers", "Contains information about Kafka consumers. Applicable for Kafka table engine (native ClickHouse integration)."); #endif #ifdef OS_LINUX - attach(context, system_database, "stack_trace"); + attach(context, system_database, "stack_trace", "Allows to obtain an unsymbolized stacktrace from all the threads of the server process."); #endif #if USE_ROCKSDB - attach(context, system_database, "rocksdb"); + attach(context, system_database, "rocksdb", "Contains a list of metrics exposed from embedded RocksDB."); +#endif +#if USE_MYSQL + attach(context, system_database, "mysql_binlogs", "Shows a list of active binlogs for MaterializedMySQL."); #endif - attach(context, system_database, "parts"); - attach(context, system_database, "projection_parts"); - attach(context, system_database, "detached_parts"); - attach(context, system_database, "parts_columns"); - attach(context, system_database, "projection_parts_columns"); - attach(context, system_database, "disks"); - attach(context, system_database, "storage_policies"); - attach(context, system_database, "processes"); - attach(context, system_database, "metrics"); - attach(context, system_database, "merges"); - attach(context, system_database, "moves"); - attach(context, system_database, "mutations"); - attach(context, system_database, "replicas"); - attach(context, system_database, "replication_queue"); - attach(context, system_database, "distributed_ddl_queue"); - attach(context, system_database, "distribution_queue"); - attach(context, system_database, "dictionaries"); - attach(context, system_database, "models"); - attach(context, system_database, "clusters"); - attach(context, system_database, "graphite_retentions"); - attach(context, system_database, "macros"); - attach(context, system_database, "replicated_fetches"); - attach(context, system_database, "part_moves_between_shards"); - attach(context, system_database, "asynchronous_inserts"); - attach(context, system_database, "filesystem_cache"); - attach(context, system_database, "query_cache"); - attach(context, system_database, "remote_data_paths"); - attach(context, system_database, "certificates"); - attach(context, system_database, "named_collections"); - attach(context, system_database, "asynchronous_loader"); - attach(context, system_database, "user_processes"); - attach(context, system_database, "jemalloc_bins"); - attach(context, system_database, "s3queue"); - attach(context, system_database, "dashboards"); - attach(context, system_database, "view_refreshes"); + attach(context, system_database, "parts", "Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row."); + attach(context, system_database, "projection_parts", "Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster."); + attach(context, system_database, "detached_parts", "Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn't use such parts anyhow."); + attach(context, system_database, "parts_columns", "Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row."); + attach(context, system_database, "projection_parts_columns", "Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row."); + attach(context, system_database, "disks", "Contains information about disks defined in the server configuration."); + attach(context, system_database, "storage_policies", "Contains information about storage policies and volumes defined in the server configuration."); + attach(context, system_database, "processes", "Contains a list of currently executing processes (queries) with their progress."); + attach(context, system_database, "metrics", "Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date."); + attach(context, system_database, "merges", "Contains a list of merges currently executing merges of MergeTree tables and their progress. Each merge operation is represented by a single row."); + attach(context, system_database, "moves", "Contains information about in-progress data part moves of MergeTree tables. Each data part movement is represented by a single row."); + attach(context, system_database, "mutations", "Contains a list of mutations and their progress. Each mutation command is represented by a single row."); + attach(context, system_database, "replicas", "Contains information and status of all table replicas on current server. Each replica is represented by a single row."); + attach(context, system_database, "replication_queue", "Contains information about tasks from replication queues stored in ClickHouse Keeper, or ZooKeeper, for each table replica."); + attach(context, system_database, "distributed_ddl_queue", "Contains information about distributed DDL queries (ON CLUSTER clause) that were executed on a cluster."); + attach(context, system_database, "distribution_queue", "Contains information about local files that are in the queue to be sent to the shards. These local files contain new parts that are created by inserting new data into the Distributed table in asynchronous mode."); + attach(context, system_database, "dictionaries", "Contains information about dictionaries."); + attach(context, system_database, "models", "Contains a list of CatBoost models loaded into a LibraryBridge's memory along with time when it was loaded."); + attach(context, system_database, "clusters", "Contains information about clusters defined in the configuration file or generated by a Replicated database."); + attach(context, system_database, "graphite_retentions", "Contains information about parameters graphite_rollup which are used in tables with *GraphiteMergeTree engines."); + attach(context, system_database, "macros", "Contains a list of all macros defined in server configuration."); + attach(context, system_database, "replicated_fetches", "Contains information about currently running background fetches."); + attach(context, system_database, "part_moves_between_shards", "Contains information about parts which are currently in a process of moving between shards and their progress."); + attach(context, system_database, "asynchronous_inserts", "Contains information about pending asynchronous inserts in queue in server's memory."); + attach(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); + attach(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); + attach(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); + attach(context, system_database, "certificates", "Contains information about available certificates and their sources."); + attach(context, system_database, "named_collections", "Contains a list of all named collections which were created via SQL query or parsed from configuration file."); + attach(context, system_database, "asynchronous_loader", "Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job."); + attach(context, system_database, "user_processes", "This system table can be used to get overview of memory usage and ProfileEvents of users."); + attach(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); + attach(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); + attach(context, system_database, "dashboards", "Contains queries used by /dashboard page accessible though HTTP interface. This table can be useful for monitoring and troubleshooting. The table contains a row for every chart in a dashboard."); + attach(context, system_database, "view_refreshes", "Lists all Refreshable Materialized Views of current server."); if (has_zookeeper) { - attach(context, system_database, "zookeeper"); - attach(context, system_database, "zookeeper_connection"); + attach(context, system_database, "zookeeper", "Exposes data from the [Zoo]Keeper cluster defined in the config. Allow to get the list of children for a particular node or read the value written inside it."); + attach(context, system_database, "zookeeper_connection", "Shows the information about current connections to [Zoo]Keeper (including auxiliary [ZooKeepers)"); } if (context->getConfigRef().getInt("allow_experimental_transactions", 0)) - attach(context, system_database, "transactions"); + attach(context, system_database, "transactions", "Contains a list of transactions and their state."); } void attachSystemTablesAsync(ContextPtr context, IDatabase & system_database, AsynchronousMetrics & async_metrics) { - attach(context, system_database, "asynchronous_metrics", async_metrics); + attach(context, system_database, "asynchronous_metrics", "Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.", async_metrics); } } diff --git a/src/Storages/System/attachSystemTablesImpl.h b/src/Storages/System/attachSystemTablesImpl.h index a1fae985d92..0b0a22baa13 100644 --- a/src/Storages/System/attachSystemTablesImpl.h +++ b/src/Storages/System/attachSystemTablesImpl.h @@ -2,12 +2,13 @@ #include #include +#include namespace DB { template -void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StorageArgs && ... args) +void attach(ContextPtr context, IDatabase & system_database, const String & table_name, const String & comment, StorageArgs && ... args) { assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE); if (system_database.getUUID() == UUIDHelpers::Nil) @@ -25,6 +26,13 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl DatabaseCatalog::instance().addUUIDMapping(table_id.uuid); String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid); system_database.attachTable(context, table_name, std::make_shared(table_id, std::forward(args)...), path); + + /// Set the comment + auto table = DatabaseCatalog::instance().getTable(table_id, context); + assert(table); + auto metadata = table->getInMemoryMetadata(); + metadata.comment = comment; + table->setInMemoryMetadata(metadata); } } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 41a222525bf..b3f5d181d5d 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -293,7 +293,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); + ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 46c38ffa129..12fd7580639 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -54,6 +53,7 @@ #include #include #include +#include #include @@ -63,6 +63,7 @@ #include #include + namespace DB { namespace ErrorCodes @@ -1268,7 +1269,7 @@ ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr contex if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); + window_column_name = boost::replace_all_copy(window_id_name, "windowID", is_tumble ? "tumble" : "hop"); /// Parse final query (same as mergeable query but has tumble/hop instead of windowID) final_query = mergeable_query->clone(); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 9358abdf7f8..7de2d5a9fdd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -129,6 +129,9 @@ endif() if (TARGET ch_contrib::sqids) set(USE_SQIDS 1) endif() +if (TARGET ch_contrib::idna) + set(USE_IDNA 1) +endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 735094df78b..4643d109c3d 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -6,7 +6,6 @@ 01155_rename_move_materialized_view 01214_test_storage_merge_aliases_with_where 01244_optimize_distributed_group_by_sharding_key -01268_mv_scalars 01268_shard_avgweighted 01495_subqueries_in_with_statement 01560_merge_distributed_join @@ -21,21 +20,17 @@ 01925_test_storage_merge_aliases 01952_optimize_distributed_group_by_sharding_key 02174_cte_scalar_cache_mv -02352_grouby_shadows_arg 02354_annoy 02428_parameterized_view -02479_race_condition_between_insert_and_droppin_mv 02493_inconsistent_hex_and_binary_number 02575_merge_prewhere_different_default_kind 00917_multiple_joins_denny_crane -00636_partition_key_parts_pruning -02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET -02404_memory_bound_merging 02725_agg_projection_resprect_PK 02763_row_policy_storage_merge_alias 02784_parallel_replicas_automatic_decision_join 02818_parameterized_view_with_cte_multiple_usage -02815_range_dict_no_direct_join # Flaky. Please don't delete them without fixing them: -01600_parts_states_metrics_long 01287_max_execution_speed +02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET +02404_memory_bound_merging +02479_race_condition_between_insert_and_droppin_mv diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 895a12313da..e3319fe4a72 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -from enum import Enum import logging from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field +from enum import Enum from pathlib import Path from typing import Callable, Dict, Iterable, List, Literal, Optional, Union @@ -47,7 +47,7 @@ class JobConfig: @num_batches - sets number of batches for multi-batch job """ - digest: DigestConfig = DigestConfig() + digest: DigestConfig = field(default_factory=DigestConfig) run_command: str = "" timeout: Optional[int] = None num_batches: int = 1 @@ -67,30 +67,32 @@ class BuildConfig: sparse_checkout: bool = False comment: str = "" static_binary_name: str = "" - job_config: JobConfig = JobConfig( - digest=DigestConfig( - include_paths=[ - "./src", - "./contrib/*-cmake", - "./contrib/consistent-hashing", - "./contrib/murmurhash", - "./contrib/libfarmhash", - "./contrib/pdqsort", - "./contrib/cityhash102", - "./contrib/sparse-checkout", - "./contrib/libmetrohash", - "./contrib/update-submodules.sh", - "./contrib/CMakeLists.txt", - "./cmake", - "./base", - "./programs", - "./packages", - "./docker/packager/packager", - ], - exclude_files=[".md"], - docker=["clickhouse/binary-builder"], - git_submodules=True, - ), + job_config: JobConfig = field( + default_factory=lambda: JobConfig( + digest=DigestConfig( + include_paths=[ + "./src", + "./contrib/*-cmake", + "./contrib/consistent-hashing", + "./contrib/murmurhash", + "./contrib/libfarmhash", + "./contrib/pdqsort", + "./contrib/cityhash102", + "./contrib/sparse-checkout", + "./contrib/libmetrohash", + "./contrib/update-submodules.sh", + "./contrib/CMakeLists.txt", + "./cmake", + "./base", + "./programs", + "./packages", + "./docker/packager/packager", + ], + exclude_files=[".md"], + docker=["clickhouse/binary-builder"], + git_submodules=True, + ), + ) ) def export_env(self, export: bool = False) -> str: @@ -107,14 +109,14 @@ class BuildConfig: @dataclass class BuildReportConfig: builds: List[str] - job_config: JobConfig = JobConfig() + job_config: JobConfig = field(default_factory=JobConfig) @dataclass class TestConfig: required_build: str force_tests: bool = False - job_config: JobConfig = JobConfig() + job_config: JobConfig = field(default_factory=JobConfig) BuildConfigs = Dict[str, BuildConfig] diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 4d81161b6de..89fcb9ce350 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -55,6 +55,7 @@ def get_additional_envs( result.append("USE_PARALLEL_REPLICAS=1") if "s3 storage" in check_name: result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + result.append("RANDOMIZE_OBJECT_KEY_TYPE=1") if "analyzer" in check_name: result.append("USE_NEW_ANALYZER=1") diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 26318b55d15..3781cdc5cc8 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Optional, Union import boto3 # type: ignore from github import Github from github.AuthenticatedUser import AuthenticatedUser +from github.GithubException import BadCredentialsException from github.NamedUser import NamedUser @@ -68,12 +69,20 @@ def get_best_robot_token(tokens_path: str = "/github-tokens") -> str: } assert tokens - for value in tokens.values(): + for name, value in tokens.items(): gh = Github(value, per_page=100) - # Do not spend additional request to API by accessin user.login unless - # the token is chosen by the remaining requests number - user = gh.get_user() - rest, _ = gh.rate_limiting + try: + # Do not spend additional request to API by accessin user.login unless + # the token is chosen by the remaining requests number + user = gh.get_user() + rest, _ = gh.rate_limiting + except BadCredentialsException: + logging.error( + "The token %(name)s has expired, please update it\n" + "::error::Token %(name)s has expired, it must be updated", + {"name": name}, + ) + continue logging.info("Get token with %s remaining requests", rest) if ROBOT_TOKEN is None: ROBOT_TOKEN = Token(user, value, rest) diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 6de0614541a..49699b7d2fd 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -47,6 +47,7 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): result.append("USE_PARALLEL_REPLICAS=1") if "s3 storage" in check_name: result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + result.append("RANDOMIZE_OBJECT_KEY_TYPE=1") if "analyzer" in check_name: result.append("USE_NEW_ANALYZER=1") diff --git a/tests/ci/release.py b/tests/ci/release.py index b5de82b6ca6..f96845dad95 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -10,19 +10,18 @@ On another hand, PyGithub is used for convenient getting commit's status from AP """ -from contextlib import contextmanager -from typing import Any, Iterator, List, Literal, Optional import argparse import json import logging import subprocess +from contextlib import contextmanager +from typing import Any, Final, Iterator, List, Optional, Tuple -from git_helper import commit, release_branch +from git_helper import Git, commit, release_branch from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, ClickHouseVersion, - Git, VersionType, get_abs_path, get_version_from_repo, @@ -61,8 +60,9 @@ class Repo: class Release: - BIG = ("major", "minor") - SMALL = ("patch",) + NEW = "new" # type: Final + PATCH = "patch" # type: Final + VALID_TYPE = (NEW, PATCH) # type: Final[Tuple[str, str]] CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) @@ -70,7 +70,7 @@ class Release: self, repo: Repo, release_commit: str, - release_type: Literal["major", "minor", "patch"], + release_type: str, dry_run: bool, with_stderr: bool, ): @@ -79,7 +79,7 @@ class Release: self.release_commit = release_commit self.dry_run = dry_run self.with_stderr = with_stderr - assert release_type in self.BIG + self.SMALL + assert release_type in self.VALID_TYPE self.release_type = release_type self._git = Git() self._version = get_version_from_repo(git=self._git) @@ -122,7 +122,7 @@ class Release: self.version = get_version_from_repo(git=self._git) def get_stable_release_type(self) -> str: - if self.version.minor % 5 == 3: # our 3 and 8 are LTS + if self.version.is_lts: return VersionType.LTS return VersionType.STABLE @@ -194,19 +194,10 @@ class Release: if check_branch: self.check_branch() - if self.release_type in self.BIG: - if self._version.minor >= 12 and self.release_type != "major": - raise ValueError( - "The release type must be 'major' for minor versions>=12" - ) - if self._version.minor < 12 and self.release_type == "major": - raise ValueError( - "The release type must be 'minor' for minor versions<12" - ) - + if self.release_type == self.NEW: with self._checkout(self.release_commit, True): # Checkout to the commit, it will provide the correct current version - with self.testing(): + with self.new_release(): with self.create_release_branch(): logging.info( "Publishing release %s from commit %s is done", @@ -214,9 +205,9 @@ class Release: self.release_commit, ) - elif self.release_type in self.SMALL: + elif self.release_type == self.PATCH: with self._checkout(self.release_commit, True): - with self.stable(): + with self.patch_release(): logging.info( "Publishing release %s from commit %s is done", self.release_version.describe, @@ -244,15 +235,12 @@ class Release: def check_branch(self): branch = self.release_branch - if self.release_type in self.BIG: + if self.release_type == self.NEW: # Commit to spin up the release must belong to a main branch branch = "master" - elif self.release_type not in self.SMALL: + elif self.release_type != self.PATCH: raise ( - ValueError( - f"release_type {self.release_type} neither in {self.BIG} nor " - f"in {self.SMALL}" - ) + ValueError(f"release_type {self.release_type} not in {self.VALID_TYPE}") ) # Prefetch the branch to have it updated @@ -296,6 +284,14 @@ class Release: dry_run=self.dry_run, ) + @property + def bump_part(self) -> ClickHouseVersion.PART_TYPE: + if self.release_type == Release.NEW: + if self._version.minor >= 12: + return "major" + return "minor" + return "patch" + @property def has_rollback(self) -> bool: return bool(self._rollback_stack) @@ -329,13 +325,13 @@ class Release: yield @contextmanager - def stable(self): + def patch_release(self): self.check_no_tags_after() self.read_version() version_type = self.get_stable_release_type() self.version.with_description(version_type) with self._create_gh_release(False): - self.version = self.version.update(self.release_type) + self.version = self.version.update(self.bump_part) self.version.with_description(version_type) self._update_cmake_contributors(self.version) # Checking out the commit of the branch and not the branch itself, @@ -355,14 +351,14 @@ class Release: yield @contextmanager - def testing(self): + def new_release(self): # Create branch for a version bump self.read_version() - self.version = self.version.update(self.release_type) + self.version = self.version.update(self.bump_part) helper_branch = f"{self.version.major}.{self.version.minor}-prepare" with self._create_branch(helper_branch, self.release_commit): with self._checkout(helper_branch, True): - with self._bump_testing_version(helper_branch): + with self._bump_version_in_master(helper_branch): yield @property @@ -432,9 +428,9 @@ class Release: yield @contextmanager - def _bump_testing_version(self, helper_branch: str) -> Iterator[None]: + def _bump_version_in_master(self, helper_branch: str) -> Iterator[None]: self.read_version() - self.version = self.version.update(self.release_type) + self.version = self.version.update(self.bump_part) self.version.with_description(VersionType.TESTING) self._update_cmake_contributors(self.version) self._commit_cmake_contributors(self.version) @@ -447,7 +443,7 @@ class Release: "--label 'do not test' --assignee @me", dry_run=self.dry_run, ) - # Here the testing part is done + # Here the new release part is done yield @contextmanager @@ -598,10 +594,10 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--type", required=True, - choices=Release.BIG + Release.SMALL, + choices=Release.VALID_TYPE, dest="release_type", help="a release type to bump the major.minor.patch version part, " - "new branch is created only for 'major' and 'minor'", + "new branch is created only for the value 'new'", ) parser.add_argument("--with-release-branch", default=True, help=argparse.SUPPRESS) parser.add_argument("--check-dirty", default=True, help=argparse.SUPPRESS) @@ -627,7 +623,7 @@ def parse_args() -> argparse.Namespace: action="store_false", default=argparse.SUPPRESS, help="(debug or development only, dangerous) if set, skip the branch check for " - "a run. By default, 'major' and 'minor' types work only for master, and 'patch' " + "a run. By default, 'new' type work only for master, and 'patch' " "works only for a release branches, that name " "should be the same as '$MAJOR.$MINOR' version, e.g. 22.2", ) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index bc403aa5015..616d645b5a6 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -5,20 +5,19 @@ import shutil import time from multiprocessing.dummy import Pool from pathlib import Path -from typing import List, Union +from typing import Any, List, Union import boto3 # type: ignore import botocore # type: ignore - -from env_helper import ( - S3_TEST_REPORTS_BUCKET, - S3_BUILDS_BUCKET, - RUNNER_TEMP, - CI, - S3_URL, - S3_DOWNLOAD, -) from compress_files import compress_file_fast +from env_helper import ( + CI, + RUNNER_TEMP, + S3_BUILDS_BUCKET, + S3_DOWNLOAD, + S3_TEST_REPORTS_BUCKET, + S3_URL, +) def _flatten_list(lst): @@ -34,11 +33,14 @@ def _flatten_list(lst): class S3Helper: max_pool_size = 100 - def __init__(self): + def __init__(self, client: Any = None, endpoint: str = S3_URL): + self.host = endpoint + if client is not None: + self.client = client + return config = botocore.config.Config(max_pool_connections=self.max_pool_size) - self.session = boto3.session.Session(region_name="us-east-1") - self.client = self.session.client("s3", endpoint_url=S3_URL, config=config) - self.host = S3_URL + session = boto3.session.Session(region_name="us-east-1") + self.client = session.client("s3", endpoint_url=endpoint, config=config) def _upload_file_to_s3( self, bucket_name: str, file_path: Path, s3_path: str @@ -199,6 +201,7 @@ class S3Helper: t = time.time() except Exception as ex: logging.critical("Failed to upload file, expcetion %s", ex) + return "" return self.s3_url(bucket_name, s3_path) p = Pool(self.max_pool_size) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 49a53c9048c..7d582e683e0 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -21,6 +21,7 @@ def get_options(i: int, upgrade_check: bool) -> str: options.append(f'''--db-engine="Replicated('/test/db/test_{i}', 's1', 'r1')"''') client_options.append("allow_experimental_database_replicated=1") client_options.append("enable_deflate_qpl_codec=1") + client_options.append("enable_zstd_qat_codec=1") # If database name is not specified, new database is created for each functional test. # Run some threads with one database for all tests. diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index fb046e989a9..30b0c2d96be 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 import logging import os.path as p -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, ArgumentTypeError -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError +from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Tuple, Union -from git_helper import TWEAK, Git as Git, get_tags, git_runner, removeprefix +from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt" CHANGELOG_IN_PATH = "debian/changelog.in" @@ -38,6 +38,8 @@ SET(VERSION_STRING {string}) class ClickHouseVersion: """Immutable version class. On update returns a new instance""" + PART_TYPE = Literal["major", "minor", "patch"] + def __init__( self, major: Union[int, str], @@ -60,7 +62,7 @@ class ClickHouseVersion: self._describe = "" self._description = "" - def update(self, part: Literal["major", "minor", "patch"]) -> "ClickHouseVersion": + def update(self, part: PART_TYPE) -> "ClickHouseVersion": """If part is valid, returns a new version""" if part == "major": return self.major_update() @@ -118,6 +120,7 @@ class ClickHouseVersion: @property def githash(self) -> str: + "returns the CURRENT git SHA1" if self._git is not None: return self._git.sha return "0000000000000000000000000000000000000000" @@ -136,6 +139,11 @@ class ClickHouseVersion: (str(self.major), str(self.minor), str(self.patch), str(self.tweak)) ) + @property + def is_lts(self) -> bool: + """our X.3 and X.8 are LTS""" + return self.minor % 5 == 3 + def as_dict(self) -> VERSIONS: return { "revision": self.revision, @@ -179,6 +187,21 @@ class ClickHouseVersion: def __le__(self, other: "ClickHouseVersion") -> bool: return self == other or self < other + def __hash__(self): + return hash(self.__repr__) + + def __str__(self): + return f"{self.string}" + + def __repr__(self): + return ( + f"" + ) + + +ClickHouseVersions = List[ClickHouseVersion] + class VersionType: LTS = "lts" @@ -265,7 +288,7 @@ def version_arg(version: str) -> ClickHouseVersion: raise ArgumentTypeError(f"version {version} does not match tag of plain version") -def get_tagged_versions() -> List[ClickHouseVersion]: +def get_tagged_versions() -> ClickHouseVersions: versions = [] for tag in get_tags(): try: @@ -276,6 +299,40 @@ def get_tagged_versions() -> List[ClickHouseVersion]: return sorted(versions) +def get_supported_versions( + versions: Optional[Iterable[ClickHouseVersion]] = None, +) -> Set[ClickHouseVersion]: + supported_stable = set() # type: Set[ClickHouseVersion] + supported_lts = set() # type: Set[ClickHouseVersion] + if versions: + versions = list(versions) + else: + # checks that repo is not shallow in background + versions = get_tagged_versions() + versions.sort() + versions.reverse() + for version in versions: + if len(supported_stable) < 3: + if not { + sv + for sv in supported_stable + if version.major == sv.major and version.minor == sv.minor + }: + supported_stable.add(version) + if (version.description == VersionType.LTS or version.is_lts) and len( + supported_lts + ) < 2: + if not { + sv + for sv in supported_lts + if version.major == sv.major and version.minor == sv.minor + }: + supported_lts.add(version) + if len(supported_stable) == 3 and len(supported_lts) == 2: + break + return supported_lts.union(supported_stable) + + def update_cmake_version( version: ClickHouseVersion, versions_path: str = FILE_WITH_VERSION_PATH, diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml index cfd4868f1dc..7ade716902c 100644 --- a/tests/config/config.d/clusters.xml +++ b/tests/config/config.d/clusters.xml @@ -144,6 +144,24 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + + 127.0.0.3 + 1234 + + + false diff --git a/tests/config/config.d/s3_storage_policy_with_template_object_key.xml b/tests/config/config.d/s3_storage_policy_with_template_object_key.xml new file mode 100644 index 00000000000..834f5102da1 --- /dev/null +++ b/tests/config/config.d/s3_storage_policy_with_template_object_key.xml @@ -0,0 +1,32 @@ + + + + + s3 + http://localhost:11111/test/ + clickhouse + clickhouse + test + + [a-z]{3}-first-random-part/new-style-prefix/[a-z]{3}/[a-z]{29} + + + cache + 1Gi + cached_s3/ + s3 + + + + + +

cached_s3
+ + + + + + s3 + + cached_s3 + diff --git a/tests/config/config.d/storage_metadata_with_full_object_key.xml b/tests/config/config.d/storage_metadata_with_full_object_key.xml new file mode 100644 index 00000000000..2bb8d49ec4b --- /dev/null +++ b/tests/config/config.d/storage_metadata_with_full_object_key.xml @@ -0,0 +1,5 @@ + + + + 1 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 2f9fd44c9b0..e2782f0a964 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -141,7 +141,26 @@ if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; th fi if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then - ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + object_key_types_options=("generate-suffix" "generate-full-key" "generate-template-key") + object_key_type="${object_key_types_options[0]}" + + if [[ -n "$RANDOMIZE_OBJECT_KEY_TYPE" ]] && [[ "$RANDOMIZE_OBJECT_KEY_TYPE" -eq 1 ]]; then + object_key_type="${object_key_types_options[$(($RANDOM % ${#object_key_types_options[@]}))]}" + fi + + case $object_key_type in + "generate-full-key") + ln -sf $SRC_PATH/config.d/storage_metadata_with_full_object_key.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + ;; + "generate-template-key") + ln -sf $SRC_PATH/config.d/storage_metadata_with_full_object_key.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/s3_storage_policy_with_template_object_key.xml $DEST_SERVER_PATH/config.d/s3_storage_policy_by_default.xml + ;; + "generate-suffix"|*) + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ + ;; + esac fi ARM="aarch64" diff --git a/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml b/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml new file mode 100644 index 00000000000..64739e3c7aa --- /dev/null +++ b/tests/integration/test_catboost_evaluate/config/logger_library_bridge.xml @@ -0,0 +1,6 @@ + + + /var/log/clickhouse-server/clickhouse-library-bridge.log + trace + + diff --git a/tests/integration/test_catboost_evaluate/test.py b/tests/integration/test_catboost_evaluate/test.py index 7412d34dd40..bf4f9f85cac 100644 --- a/tests/integration/test_catboost_evaluate/test.py +++ b/tests/integration/test_catboost_evaluate/test.py @@ -12,7 +12,9 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", stay_alive=True, main_configs=["config/models_config.xml"] + "instance", + stay_alive=True, + main_configs=["config/models_config.xml", "config/logger_library_bridge.xml"], ) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 45ee876aa1d..ec34c181371 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -127,6 +127,50 @@ def check_secrets_for_tables(test_cases, password): ) +def test_backup_table(): + password = new_password() + + setup_queries = [ + "CREATE TABLE backup_test (x int) ENGINE = MergeTree ORDER BY x", + "INSERT INTO backup_test SELECT * FROM numbers(10)", + ] + + endpoints_with_credentials = [ + ( + f"S3('http://minio1:9001/root/data/backup_test_base', 'minio', '{password}')", + f"S3('http://minio1:9001/root/data/backup_test_incremental', 'minio', '{password}')", + ) + ] + + for query in setup_queries: + node.query_and_get_answer_with_error(query) + + # Actually need to make two backups to have base_backup + def make_test_case(endpoint_specs): + # Run ASYNC so it returns the backup id + return ( + f"BACKUP TABLE backup_test TO {endpoint_specs[0]} ASYNC", + f"BACKUP TABLE backup_test TO {endpoint_specs[1]} SETTINGS async=1, base_backup={endpoint_specs[0]}", + ) + + test_cases = [ + make_test_case(endpoint_spec) for endpoint_spec in endpoints_with_credentials + ] + for base_query, inc_query in test_cases: + node.query_and_get_answer_with_error(base_query)[0] + + inc_backup_query_output = node.query_and_get_answer_with_error(inc_query)[0] + inc_backup_id = TSV.toMat(inc_backup_query_output)[0][0] + names_in_system_backups_output, _ = node.query_and_get_answer_with_error( + f"SELECT base_backup_name, name FROM system.backups where id = '{inc_backup_id}'" + ) + + base_backup_name, name = TSV.toMat(names_in_system_backups_output)[0] + + assert password not in base_backup_name + assert password not in name + + def test_create_table(): password = new_password() diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 286a2d29541..97c8b65f15d 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -2714,3 +2714,698 @@ def table_with_indexes(clickhouse_node, mysql_node, service_name): mysql_node.query(f"DROP DATABASE IF EXISTS {db}") clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def binlog_client_test(clickhouse_node, mysql_node, replication): + db = "binlog_client_test" + replication.create_db_mysql(db) + + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100000, column="score") + replication.create_db_ch(f"{db}1", from_mysql_db=db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}1 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + + num_rows = replication.inserted_rows + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch(f"{db}2", from_mysql_db=db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}2 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + num_rows = replication.inserted_rows + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}1'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}2'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + # Catch up + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + + replication.drop_dbs_ch() + replication.create_db_ch( + f"{db}1", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.create_db_ch( + f"{db}2", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.insert_data(db, "t", 10000, column="score") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}1 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}2 FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 10000, column="score") + + num_rows = replication.inserted_rows + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}1'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT count() FROM system.mysql_binlogs WHERE name = '{db}2'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=60, + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1", + ) + + mysql_node.query(f"UPDATE {db}.t SET score = score + 1") + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}2'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}3'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}1.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}2.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}3.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}1") + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=10, + ) + + +def binlog_client_timeout_test(clickhouse_node, mysql_node, replication): + db = "binlog_client_timeout_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 10000, column="score") + num_rows = replication.inserted_rows + + replication.create_db_ch( + f"{db}1", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=100000000, max_milliseconds_to_wait_in_binlog_queue=60000", + ) + replication.create_db_ch( + f"{db}2", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10", + ) + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=100", + ) + replication.create_db_ch( + f"{db}4", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=10", + ) + + # After incremental sync + check_query( + clickhouse_node, + f"/* expect: 100000000, 60000 */ SELECT max_bytes, max_waiting_ms FROM system.mysql_binlogs WHERE name = '{db}1'", + f"100000000\t60000\n", + interval_seconds=1, + retry_count=10, + ) + check_query( + clickhouse_node, + f"/* expect: 10 */ SELECT max_bytes FROM system.mysql_binlogs WHERE name = '{db}2'", + f"10\n", + interval_seconds=2, + retry_count=10, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}4.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE {db}3") + replication.create_db_ch( + f"{db}3", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=10", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}3 FORMAT TSV", + "t\n", + ) + + clickhouse_node.query(f"DROP DATABASE {db}4") + replication.create_db_ch( + f"{db}4", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_queue=10, max_milliseconds_to_wait_in_binlog_queue=50", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}4 FORMAT TSV", + "t\n", + ) + + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 1000 AND id < 100000" + ) + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 1000") + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}1.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}2.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}3.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}4.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}1'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}2'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}3'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT size FROM system.mysql_binlogs WHERE name = '{db}4'", + "0\n", + interval_seconds=1, + retry_count=300, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}1.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}2.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}3.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {num_rows} */ SELECT count() FROM {db}4.t", + f"{num_rows}\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}1.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}2.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}3.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}4.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + + +def wrong_password_test(clickhouse_node, mysql_node, replication): + db = "wrong_password_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100, column="score") + with pytest.raises(Exception) as exc: + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{replication.mysql_host}:3306', '{db}', 'root', 'wrong_password') SETTINGS use_binlog_client=1" + ) + + replication.create_db_ch(db, settings="use_binlog_client=1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100, column="score") + check_query( + clickhouse_node, + f"/* expect: 200 */ SELECT COUNT() FROM {db}.t ", + "200\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: root@{replication.mysql_host}:3306 */ SELECT binlog_client_name FROM system.mysql_binlogs WHERE name = '{db}'", + f"root@{replication.mysql_host}:3306\n", + interval_seconds=1, + retry_count=30, + ) + + +def dispatcher_buffer_test(clickhouse_node, mysql_node, replication): + db = "dispatcher_buffer_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + replication.insert_data(db, "t", 100, column="score") + rows_count = 100 + replication.create_db_ch( + db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=0, max_flush_milliseconds_in_binlog_dispatcher=0", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + rows_count += 100000 + + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + column = "bit_xor(cast(crc32(concat(toString(assumeNotNull(id)), toString(assumeNotNull(score)), toString(assumeNotNull(create_time)))) AS UInt32)) AS checksum" + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-01-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + clickhouse_node.query(f"DROP DATABASE {db}") + replication.create_db_ch( + f"{db}", + from_mysql_db=db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=1000, max_flush_milliseconds_in_binlog_dispatcher=1000", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 10000, column="score") + rows_count += 10000 + + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 10000") + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-11-01'" + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-11-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + replication.create_db_ch( + db, + settings="use_binlog_client=1, max_bytes_in_binlog_dispatcher_buffer=100000000, max_flush_milliseconds_in_binlog_dispatcher=1000", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t\n", + ) + + replication.insert_data(db, "t", 100000, column="score") + rows_count += 100000 + + mysql_node.query(f"UPDATE {db}.t SET create_time='2021-11-11' WHERE score > 10000") + mysql_node.query( + f"UPDATE {db}.t SET create_time='2021-01-01' WHERE id > 10000 AND id < 50000" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-01-01'" + ) + mysql_node.query( + f"UPDATE {db}.t SET create_time=now() WHERE create_time='2021-11-01'" + ) + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM system.mysql_binlogs WHERE name = '{db}' AND (dispatcher_events_read_per_sec > 0 OR dispatcher_bytes_read_per_sec > 0 OR dispatcher_events_flush_per_sec > 0 OR dispatcher_bytes_flush_per_sec > 0)", + f"1\n", + interval_seconds=1, + retry_count=30, + ) + + mysql_crc32 = mysql_node.query_and_get_data( + f"SELECT bit_xor(cast(crc32(concat(id, score, create_time)) AS unsigned)) AS checksum FROM {db}.t" + )[0][0] + check_query( + clickhouse_node, + f"/* expect: {mysql_crc32} */ SELECT {column} FROM {db}.t", + f"{mysql_crc32}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: {rows_count} */ SELECT COUNT() FROM {db}.t", + f"{rows_count}\n", + interval_seconds=1, + retry_count=30, + ) + check_query( + clickhouse_node, + f"/* expect: 0 */ SELECT COUNT() FROM {db}.t WHERE toDate(create_time)='2021-11-01'", + "0\n", + interval_seconds=1, + retry_count=30, + ) + + +def gtid_after_attach_test(clickhouse_node, mysql_node, replication): + db = "gtid_after_attach_test" + replication.create_db_mysql(db) + mysql_node.query( + f"CREATE TABLE {db}.t(id INT PRIMARY KEY AUTO_INCREMENT, score int, create_time DATETIME DEFAULT NOW())" + ) + + db_count = 6 + for i in range(db_count): + replication.create_db_ch( + f"{db}{i}", + from_mysql_db=db, + settings="use_binlog_client=1", + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db}0 FORMAT TSV", + "t\n", + ) + for i in range(int(db_count / 2)): + clickhouse_node.query(f"DETACH DATABASE {db}{i}") + + mysql_node.query(f"USE {db}") + rows = 10000 + for i in range(100): + mysql_node.query(f"ALTER TABLE t ADD COLUMN (e{i} INT)") + replication.insert_data(db, "t", rows, column="score") + + clickhouse_node.restart_clickhouse(stop_start_wait_sec=120) + + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT(DISTINCT(dispatcher_name)) FROM system.mysql_binlogs WHERE name LIKE '{db}%'", + "1\n", + interval_seconds=1, + retry_count=300, + ) diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index f3369e25d94..727188a4b86 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,3 +1,4 @@ +import os import time import pymysql.cursors import pytest @@ -142,6 +143,145 @@ def clickhouse_node(): yield node_db +class ReplicationHelper: + def __init__(self, clickhouse, mysql, mysql_host=None): + self.clickhouse = clickhouse + self.mysql = mysql + self.created_mysql_dbs = [] + self.created_clickhouse_dbs = [] + self.base_mysql_settings = os.getenv("TEST_BASE_MYSQL_SETTINGS", "") + self.base_ch_settings = os.getenv("TEST_BASE_CH_SETTINGS", "") + self.mysql_host = mysql_host if mysql_host is not None else cluster.mysql8_host + self.created_insert_procedures = {} + self.inserted_rows_per_sp = {} + self.inserted_rows = 0 + + def create_dbs(self, db_name, ch_settings="", mysql_settings=""): + self.create_db_mysql(db_name, settings=mysql_settings) + self.create_db_ch(db_name, settings=ch_settings) + + def create_db_mysql(self, db_name, settings=""): + self.mysql.query(f"DROP DATABASE IF EXISTS {db_name}") + self.mysql.query( + f"CREATE DATABASE {db_name} {self.base_mysql_settings} {settings}" + ) + self.created_mysql_dbs.append(db_name) + + def create_db_ch( + self, db_name, from_mysql_db=None, settings="", table_overrides="" + ): + if from_mysql_db is None: + from_mysql_db = db_name + self.clickhouse.query(f"DROP DATABASE IF EXISTS {db_name}") + all_settings = "" + create_query = f"CREATE DATABASE {db_name} ENGINE = MaterializedMySQL('{self.mysql_host}:3306', '{from_mysql_db}', 'root', 'clickhouse')" + if self.base_ch_settings or settings: + separator = ", " if self.base_ch_settings and settings else "" + create_query += f" SETTINGS {self.base_ch_settings}{separator}{settings}" + if table_overrides: + create_query += f" {table_overrides}" + self.clickhouse.query(create_query) + self.created_clickhouse_dbs.append(db_name) + + def drop_dbs_mysql(self): + for db_name in self.created_mysql_dbs: + self.mysql.query(f"DROP DATABASE IF EXISTS {db_name}") + self.created_mysql_dbs = [] + self.created_insert_procedures = {} + self.inserted_rows_per_sp = {} + self.inserted_rows = 0 + + def drop_dbs_ch(self): + for db_name in self.created_clickhouse_dbs: + self.clickhouse.query(f"DROP DATABASE IF EXISTS {db_name}") + self.created_clickhouse_dbs = [] + + def drop_dbs(self): + self.drop_dbs_mysql() + self.drop_dbs_ch() + + def create_stored_procedure(self, db, table, column): + sp_id = f"{db}_{table}_{column}" + if sp_id in self.created_insert_procedures: + return sp_id + self.mysql.query(f"DROP PROCEDURE IF EXISTS {db}.insert_test_data_{sp_id}") + self.mysql.query( + f""" +CREATE PROCEDURE {db}.insert_test_data_{sp_id}(IN num_rows INT, IN existing_rows INT) +BEGIN + DECLARE i INT; + SET i = existing_rows; + SET @insert = concat("INSERT INTO {table} ({column}) VALUES "); + SET @exedata = ""; + WHILE i < (num_rows + existing_rows) DO + SET @exedata=concat(@exedata, ",(", i , ")"); + SET i = i + 1; + IF i % 1000 = 0 + THEN + SET @exedata = SUBSTRING(@exedata, 2); + SET @exesql = concat(@insert, @exedata); + PREPARE stmt FROM @exesql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + SET @exedata = ""; + END IF; + END WHILE; + IF length(@exedata) > 0 + THEN + SET @exedata = SUBSTRING(@exedata, 2); + SET @exesql = concat(@insert, @exedata); + PREPARE stmt FROM @exesql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + END IF; +END""" + ) + self.created_insert_procedures[sp_id] = True + self.inserted_rows_per_sp[sp_id] = 0 + return sp_id + + def insert_data(self, db, table, num_rows, column="id"): + """Inserts num_rows into db.table, into the column `column` (which must be INT)""" + sp_id = self.create_stored_procedure(db, table, column) + self.mysql.query( + f"CALL {db}.insert_test_data_{sp_id}({num_rows}, {self.inserted_rows_per_sp[sp_id]})" + ) + self.inserted_rows_per_sp[sp_id] += num_rows + self.inserted_rows += num_rows + + def wait_for_sync_to_catch_up( + self, database: str = "", retry_count=30, interval_seconds=1 + ): + if database == "": + database = self.created_clickhouse_dbs[-1] + mysql_gtid = self.mysql.query_and_get_data("SELECT @@GLOBAL.gtid_executed")[0][ + 0 + ] + materialized_with_ddl.check_query( + self.clickhouse, + f"SELECT executed_gtid_set /* expect: {mysql_gtid} */ FROM system.materialized_mysql_databases WHERE name = '{database}'", + f"{mysql_gtid}\n", + retry_count=retry_count, + interval_seconds=interval_seconds, + ) + + +@pytest.fixture(scope="function") +def replication(started_mysql_8_0, request): + try: + replication = ReplicationHelper(node_db, started_mysql_8_0) + yield replication + finally: + if hasattr(request.session, "testsfailed") and request.session.testsfailed: + logging.warning(f"tests failed - not dropping databases") + else: + # drop databases only if the test succeeds - so we can inspect the database after failed tests + try: + replication.drop_dbs() + except Exception as e: + logging.warning(f"replication.drop_dbs() failed: {e}") + + def test_materialized_database_dml_with_mysql_5_7( started_cluster, started_mysql_5_7, clickhouse_node: ClickHouseInstance ): @@ -556,3 +696,21 @@ def test_table_with_indexes(started_cluster, started_mysql_8_0, clickhouse_node) materialized_with_ddl.table_with_indexes( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_binlog_client(started_cluster, started_mysql_8_0, replication): + materialized_with_ddl.binlog_client_test(node_db, started_mysql_8_0, replication) + replication.drop_dbs() + materialized_with_ddl.binlog_client_timeout_test( + node_db, started_mysql_8_0, replication + ) + replication.drop_dbs() + materialized_with_ddl.wrong_password_test(node_db, started_mysql_8_0, replication) + replication.drop_dbs() + materialized_with_ddl.dispatcher_buffer_test( + node_db, started_mysql_8_0, replication + ) + replication.drop_dbs() + materialized_with_ddl.gtid_after_attach_test( + node_db, started_mysql_8_0, replication + ) diff --git a/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml b/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml new file mode 100644 index 00000000000..c686b37a537 --- /dev/null +++ b/tests/integration/test_non_default_compression/configs/enable_zstdqat_codec.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index baac2661506..c646a678512 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -87,8 +87,3 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) - else: - # we first transform all replicas into shards and then append for each shard filter - assert n1.contains_in_log( - "Single shard cluster used with custom_key, transforming replicas into virtual shards" - ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py b/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml new file mode 100644 index 00000000000..da4e2517a44 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml @@ -0,0 +1,26 @@ + + + + + false + + n1 + 9000 + + + n2 + 1234 + + + n3 + 9000 + + + n4 + 1234 + + + + + + diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py new file mode 100644 index 00000000000..2b5aa2682d5 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -0,0 +1,128 @@ +import pytest +import uuid +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + +nodes = [node1, node3] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name): + node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + node1.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + node3.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + + # create distributed table + node1.query(f"DROP TABLE IF EXISTS {table_name}_d SYNC") + node1.query( + f""" + CREATE TABLE {table_name}_d AS {table_name} + Engine=Distributed( + {cluster}, + currentDatabase(), + {table_name}, + key + ) + """ + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + + +@pytest.mark.parametrize("use_hedged_requests", [1, 0]) +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_parallel_replicas_custom_key_failover( + start_cluster, + use_hedged_requests, + custom_key, + filter_type, + prefer_localhost_replica, +): + cluster = "test_single_shard_multiple_replicas" + table = "test_table" + + create_tables(cluster, table) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = uuid.uuid4() + assert ( + node1.query( + f"SELECT key, count() FROM {table}_d GROUP BY key ORDER BY key", + settings={ + "log_comment": log_comment, + "prefer_localhost_replica": prefer_localhost_replica, + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "use_hedged_requests": use_hedged_requests, + # "async_socket_for_remote": 0, + # "async_query_sending_for_remote": 0, + }, + ) + == expected_result + ) + + for node in nodes: + node.query("system flush logs") + + # the subqueries should be spread over available nodes + query_id = node1.query( + f"SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id" + ) + assert query_id != "" + query_id = query_id[:-1] + + if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t3\nn3\t2\n" + ) diff --git a/tests/integration/test_remote_blobs_naming/configs/setting.xml b/tests/integration/test_remote_blobs_naming/configs/setting.xml new file mode 100644 index 00000000000..408fa36fdd3 --- /dev/null +++ b/tests/integration/test_remote_blobs_naming/configs/setting.xml @@ -0,0 +1,11 @@ + + + + + + 1 + 1 + + + + diff --git a/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml index 31c6a3bf968..e901f0df51d 100644 --- a/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml +++ b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml @@ -9,17 +9,17 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/old-style-prefix/with-several-section/ minio minio123 - s3_plain - http://minio1:9001/root/data/s3_pain_key_prefix - minio - minio123 - true - + s3_plain + http://minio1:9001/root/data/s3_pain_key_prefix + minio + minio123 + true + @@ -30,7 +30,6 @@ -
@@ -38,6 +37,13 @@
+ + +
+ s3 +
+
+
diff --git a/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml b/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml new file mode 100644 index 00000000000..c3b515e8777 --- /dev/null +++ b/tests/integration/test_remote_blobs_naming/configs/storage_conf_new.xml @@ -0,0 +1,61 @@ + + + + + test + + + + + + s3 + http://minio1:9001/root/old-style-prefix/with-several-section/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/s3_pain_key_prefix + minio + minio123 + true + + + s3 + http://minio1:9001/root/ + minio + minio123 + old-style-prefix/with-several-section + [a-z]{3}-first-random-part/new-style-prefix/constant-part/[a-z]{3}/[a-z]{29} + + + + + + +
+ s3 +
+
+
+ + +
+ s3_plain +
+
+
+ + +
+ s3_template_key +
+
+
+
+
+ + + s3 + +
diff --git a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py index 485bf73dad1..8c52b05dba2 100644 --- a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py +++ b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 - +from contextlib import contextmanager +from difflib import unified_diff import logging +import re import pytest import os @@ -27,7 +29,7 @@ def cluster(): "new_node", main_configs=[ "configs/new_node.xml", - "configs/storage_conf.xml", + "configs/storage_conf_new.xml", ], user_configs=[ "configs/settings.xml", @@ -49,6 +51,7 @@ def cluster(): with_zookeeper=True, stay_alive=True, ) + logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -200,8 +203,32 @@ def test_write_new_format(cluster): assert remote == object_key -@pytest.mark.parametrize("storage_policy", ["s3", "s3_plain"]) -def test_replicated_merge_tree(cluster, storage_policy): +@contextmanager +def drop_table_scope(nodes, tables, create_statements): + try: + for node in nodes: + for statement in create_statements: + node.query(statement) + yield + finally: + for node in nodes: + for table in tables: + node.query(f"DROP TABLE IF EXISTS {table} SYNC") + + +@pytest.mark.parametrize( + "test_case", + [ + ("s3_plain", False), + ("s3", False), + ("s3", True), + ("s3_template_key", False), + ("s3_template_key", True), + ], +) +def test_replicated_merge_tree(cluster, test_case): + storage_policy, zero_copy = test_case + if storage_policy == "s3_plain": # MergeTree table doesn't work on s3_plain. Rename operation is not implemented return @@ -209,35 +236,172 @@ def test_replicated_merge_tree(cluster, storage_policy): node_old = cluster.instances["node"] node_new = cluster.instances["new_node"] + zk_table_path = f"/clickhouse/tables/test_replicated_merge_tree_{storage_policy}{'_zero_copy' if zero_copy else ''}" create_table_statement = f""" - CREATE TABLE test_replicated_merge_tree ( - id Int64, - val String - ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_replicated_merge_tree_{storage_policy}', '{{replica}}') - PARTITION BY id - ORDER BY (id, val) - SETTINGS - storage_policy='{storage_policy}' - """ + CREATE TABLE test_replicated_merge_tree ( + id Int64, + val String + ) ENGINE=ReplicatedMergeTree('{zk_table_path}', '{{replica}}') + PARTITION BY id + ORDER BY (id, val) + SETTINGS + storage_policy='{storage_policy}', + allow_remote_fs_zero_copy_replication='{1 if zero_copy else 0}' + """ - node_old.query(create_table_statement) - node_new.query(create_table_statement) + with drop_table_scope( + [node_old, node_new], ["test_replicated_merge_tree"], [create_table_statement] + ): + node_old.query("INSERT INTO test_replicated_merge_tree VALUES (0, 'a')") + node_new.query("INSERT INTO test_replicated_merge_tree VALUES (1, 'b')") - node_old.query("INSERT INTO test_replicated_merge_tree VALUES (0, 'a')") - node_new.query("INSERT INTO test_replicated_merge_tree VALUES (1, 'b')") + # node_old have to fetch metadata from node_new and vice versa + node_old.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") + node_new.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") - # node_old have to fetch metadata from node_new and vice versa - node_old.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") - node_new.query("SYSTEM SYNC REPLICA test_replicated_merge_tree") + count_old = node_old.query( + "SELECT count() FROM test_replicated_merge_tree" + ).strip() + count_new = node_new.query( + "SELECT count() FROM test_replicated_merge_tree" + ).strip() - count_old = node_old.query("SELECT count() FROM test_replicated_merge_tree").strip() - count_new = node_new.query("SELECT count() FROM test_replicated_merge_tree").strip() + assert count_old == "2" + assert count_new == "2" - assert count_old == "2" - assert count_new == "2" + if not zero_copy: + return - node_old.query("DROP TABLE test_replicated_merge_tree SYNC") - node_new.query("DROP TABLE test_replicated_merge_tree SYNC") + def get_remote_pathes(node, table_name, only_remote_path=True): + uuid = node.query( + f""" + SELECT uuid + FROM system.tables + WHERE name = '{table_name}' + """ + ).strip() + assert uuid + return node.query( + f""" + SELECT {"remote_path" if only_remote_path else "*"} + FROM system.remote_data_paths + WHERE + local_path LIKE '%{uuid}%' + AND local_path NOT LIKE '%format_version.txt%' + ORDER BY ALL + """ + ).strip() + + remote_pathes_old = get_remote_pathes(node_old, "test_replicated_merge_tree") + remote_pathes_new = get_remote_pathes(node_new, "test_replicated_merge_tree") + + assert len(remote_pathes_old) > 0 + assert remote_pathes_old == remote_pathes_new, ( + str(unified_diff(remote_pathes_old, remote_pathes_new)) + + "\n\nold:\n" + + get_remote_pathes(node_old, "test_replicated_merge_tree", False) + + "\n\nnew:\n" + + get_remote_pathes(node_new, "test_replicated_merge_tree", False) + ) + + def count_lines_with(lines, pattern): + return sum([1 for x in lines if pattern in x]) + + remore_pathes_with_old_format = count_lines_with( + remote_pathes_old.split(), "old-style-prefix" + ) + remore_pathes_with_new_format = count_lines_with( + remote_pathes_old.split(), "new-style-prefix" + ) + + if storage_policy == "s3_template_key": + assert remore_pathes_with_old_format == remore_pathes_with_new_format + assert remore_pathes_with_old_format == len(remote_pathes_old.split()) / 2 + else: + assert remore_pathes_with_old_format == len(remote_pathes_old.split()) + assert remore_pathes_with_new_format == 0 + + parts = ( + node_old.query( + """ + SELECT name + FROM system.parts + WHERE + table = 'test_replicated_merge_tree' + AND active + ORDER BY ALL + """ + ) + .strip() + .split() + ) + table_shared_uuid = node_old.query( + f"SELECT value FROM system.zookeeper WHERE path='{zk_table_path}' and name='table_shared_id'" + ).strip() + + part_blobs = {} + blobs_replicas = {} + + for part in parts: + blobs = ( + node_old.query( + f""" + SELECT name + FROM system.zookeeper + WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}' + ORDER BY ALL + """ + ) + .strip() + .split() + ) + + for blob in blobs: + replicas = ( + node_old.query( + f""" + SELECT name + FROM system.zookeeper + WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}/{blob}' + ORDER BY ALL + """ + ) + .strip() + .split() + ) + assert blob not in blobs_replicas + blobs_replicas[blob] = replicas + + assert part not in part_blobs + part_blobs[part] = blobs + + assert len(parts) == 2, "parts: " + str(parts) + assert len(part_blobs.keys()) == len(parts), ( + "part_blobs: " + str(part_blobs) + "; parts: " + str(parts) + ) + assert len(blobs_replicas.keys()) == len(parts), ( + "blobs_replicas: " + str(blobs_replicas) + "; parts: " + str(parts) + ) + + for replicas in blobs_replicas.values(): + assert len(replicas) == 2, "blobs_replicas: " + str(blobs_replicas) + + for blob in blobs_replicas.keys(): + assert re.match( + "(old-style-prefix_with-several-section|[a-z]{3}-first-random-part_new-style-prefix_constant-part)_[a-z]{3}_[a-z]{29}", + blob, + ), "blobs_replicas: " + str(blobs_replicas) + + old_style_count = sum( + [1 for x in blobs_replicas.keys() if "old-style-prefix" in x] + ) + new_style_count = sum( + [1 for x in blobs_replicas.keys() if "new-style-prefix" in x] + ) + + assert (new_style_count > 0 and old_style_count == new_style_count) or ( + new_style_count == 0 and old_style_count == len(blobs_replicas) + ) def switch_config_write_full_object_key(node, enable): diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 3e3132949e7..e2257026dc7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -859,6 +859,55 @@ def test_settings(started_cluster): conn.close() +def test_mysql_point(started_cluster): + table_name = "test_mysql_point" + node1.query(f"DROP TABLE IF EXISTS {table_name}") + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + f""" + CREATE TABLE `clickhouse`.`{table_name}` ( + `id` int NOT NULL, + `point` Point NOT NULL, + PRIMARY KEY (`id`)) ENGINE=InnoDB; + """ + ) + cursor.execute( + f"INSERT INTO `clickhouse`.`{table_name}` SELECT 1, Point(15, 20)" + ) + assert 1 == cursor.execute(f"SELECT count(*) FROM `clickhouse`.`{table_name}`") + + conn.commit() + + result = node1.query( + f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint" + + assert 1 == int( + node1.query( + f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + ) + assert ( + "(15,20)" + == node1.query( + f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ).strip() + ) + + node1.query("DROP TABLE IF EXISTS test") + node1.query( + f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert "(15,20)" == node1.query(f"SELECT point FROM test").strip() + + drop_mysql_table(conn, table_name) + conn.close() + + if __name__ == "__main__": with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): diff --git a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml deleted file mode 100644 index 7dac6d9fbb5..00000000000 --- a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - http://resolver:8080 -
Authorization: Bearer TOKEN
-
- - http://resolver:8080/root-with-auth/restricteddirectory/ - -
-
diff --git a/tests/integration/test_storage_s3_queue/configs/named_collections.xml b/tests/integration/test_storage_s3_queue/configs/named_collections.xml deleted file mode 100644 index 64674e2a3e3..00000000000 --- a/tests/integration/test_storage_s3_queue/configs/named_collections.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - http://minio1:9001/root/test_table - minio - minio123 - - - http://minio1:9001/root/test_parquet - minio - minio123 - - - http://minio1:9001/root/test_parquet_gz - minio - minio123 - - - http://minio1:9001/root/test_orc - minio - minio123 - - - http://minio1:9001/root/test_native - minio - minio123 - - - http://minio1:9001/root/test.arrow - minio - minio123 - - - http://minio1:9001/root/test.parquet - minio - minio123 - - - http://minio1:9001/root/test_cache4.jsonl - true - - - diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b83c095a7a6..7d40060fec6 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1,6 +1,5 @@ import io import logging -import os import random import time @@ -9,75 +8,57 @@ from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance import json -""" -export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-server -export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-client -export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-odbc-bridge -export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/home/sergey/vkr/ClickHouse/programs/server -""" - -MINIO_INTERNAL_PORT = 9001 AVAILABLE_MODES = ["unordered", "ordered"] -AUTH = "'minio','minio123'," -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_AUTH = ["'minio'", "'minio123'"] +NO_AUTH = ["NOSIGN"] -def prepare_s3_bucket(started_cluster): - # Allows read-write access for bucket without authorization. - bucket_read_write_policy = { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetBucketLocation", - "Resource": "arn:aws:s3:::root", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:ListBucket", - "Resource": "arn:aws:s3:::root", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::root/*", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:PutObject", - "Resource": "arn:aws:s3:::root/*", - }, - { - "Sid": "", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": "s3:DeleteObject", - "Resource": "arn:aws:s3:::root/*", - }, - ], - } +def prepare_public_s3_bucket(started_cluster): + def create_bucket(client, bucket_name, policy): + if client.bucket_exists(bucket_name): + client.remove_bucket(bucket_name) + + client.make_bucket(bucket_name) + + client.set_bucket_policy(bucket_name, json.dumps(policy)) + + def get_policy_with_public_access(bucket_name): + return { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetBucketLocation", + "s3:ListBucket", + ], + "Resource": f"arn:aws:s3:::{bucket_name}", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + ], + "Resource": f"arn:aws:s3:::{bucket_name}/*", + }, + ], + } minio_client = started_cluster.minio_client - minio_client.set_bucket_policy( - started_cluster.minio_bucket, json.dumps(bucket_read_write_policy) - ) - started_cluster.minio_restricted_bucket = "{}-with-auth".format( - started_cluster.minio_bucket + started_cluster.minio_public_bucket = f"{started_cluster.minio_bucket}-public" + create_bucket( + minio_client, + started_cluster.minio_public_bucket, + get_policy_with_public_access(started_cluster.minio_public_bucket), ) - if minio_client.bucket_exists(started_cluster.minio_restricted_bucket): - minio_client.remove_bucket(started_cluster.minio_restricted_bucket) - - minio_client.make_bucket(started_cluster.minio_restricted_bucket) @pytest.fixture(autouse=True) @@ -89,11 +70,9 @@ def s3_queue_setup_teardown(started_cluster): instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") minio = started_cluster.minio_client - objects = list( - minio.list_objects(started_cluster.minio_restricted_bucket, recursive=True) - ) + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) for obj in objects: - minio.remove_object(started_cluster.minio_restricted_bucket, obj.object_name) + minio.remove_object(started_cluster.minio_bucket, obj.object_name) yield # run test @@ -107,8 +86,6 @@ def started_cluster(): with_minio=True, with_zookeeper=True, main_configs=[ - "configs/defaultS3.xml", - "configs/named_collections.xml", "configs/zookeeper.xml", "configs/s3queue_log.xml", ], @@ -119,8 +96,6 @@ def started_cluster(): with_minio=True, with_zookeeper=True, main_configs=[ - "configs/defaultS3.xml", - "configs/named_collections.xml", "configs/s3queue_log.xml", ], ) @@ -129,7 +104,6 @@ def started_cluster(): cluster.start() logging.info("Cluster started") - prepare_s3_bucket(cluster) yield cluster finally: cluster.shutdown() @@ -146,7 +120,13 @@ def run_query(instance, query, stdin=None, settings=None): def generate_random_files( - started_cluster, files_path, count, column_num=3, row_num=10, start_ind=0 + started_cluster, + files_path, + count, + column_num=3, + row_num=10, + start_ind=0, + bucket=None, ): files = [ (f"{files_path}/test_{i}.csv", i) for i in range(start_ind, start_ind + count) @@ -164,28 +144,14 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - put_s3_file_content(started_cluster, filename, values_csv) + put_s3_file_content(started_cluster, filename, values_csv, bucket) return total_values -def put_s3_file_content(started_cluster, filename, data): +def put_s3_file_content(started_cluster, filename, data, bucket=None): + bucket = started_cluster.minio_bucket if bucket is None else bucket buf = io.BytesIO(data) - started_cluster.minio_client.put_object( - started_cluster.minio_bucket, filename, buf, len(data) - ) - - -def get_s3_file_content(started_cluster, bucket, filename, decode=True): - # type: (ClickHouseCluster, str, str, bool) -> str - # Returns content of given S3 file as string. - - data = started_cluster.minio_client.get_object(bucket, filename) - data_str = b"" - for chunk in data.stream(): - data_str += chunk - if decode: - return data_str.decode() - return data_str + started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) def create_table( @@ -197,7 +163,12 @@ def create_table( format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", + auth=DEFAULT_AUTH, + bucket=None, ): + auth_params = ",".join(auth) + bucket = started_cluster.minio_bucket if bucket is None else bucket + settings = { "s3queue_loading_retries": 0, "after_processing": "keep", @@ -206,11 +177,11 @@ def create_table( } settings.update(additional_settings) - url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{started_cluster.minio_bucket}/{files_path}/" + url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" CREATE TABLE {table_name} ({format}) - ENGINE = S3Queue('{url}', {AUTH}'{file_format}') + ENGINE = S3Queue('{url}', {auth_params}, {file_format}) SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ node.query(create_query) @@ -922,3 +893,70 @@ def test_drop_table(started_cluster): ) or node.contains_in_log( f"StorageS3Queue ({table_name}): Shutdown was called, stopping sync" ) + + +def test_s3_client_reused(started_cluster): + node = started_cluster.instances["instance"] + table_name = f"test.test_s3_client_reused" + dst_table_name = f"{table_name}_dst" + files_path = f"{table_name}_data" + row_num = 10 + + def get_created_s3_clients_count(): + value = node.query( + f"SELECT value FROM system.events WHERE event='S3Clients'" + ).strip() + return int(value) if value != "" else 0 + + def wait_all_processed(files_num): + expected_count = files_num * row_num + for _ in range(100): + count = int(node.query(f"SELECT count() FROM {dst_table_name}")) + print(f"{count}/{expected_count}") + if count == expected_count: + break + time.sleep(1) + assert ( + int(node.query(f"SELECT count() FROM {dst_table_name}")) == expected_count + ) + + prepare_public_s3_bucket(started_cluster) + + s3_clients_before = get_created_s3_clients_count() + + create_table( + started_cluster, + node, + table_name, + "ordered", + files_path, + additional_settings={ + "after_processing": "delete", + "s3queue_processing_threads_num": 1, + }, + auth=NO_AUTH, + bucket=started_cluster.minio_public_bucket, + ) + + s3_clients_after = get_created_s3_clients_count() + assert s3_clients_before + 1 == s3_clients_after + + create_mv(node, table_name, dst_table_name) + + for i in range(0, 10): + s3_clients_before = get_created_s3_clients_count() + + generate_random_files( + started_cluster, + files_path, + count=1, + start_ind=i, + row_num=row_num, + bucket=started_cluster.minio_public_bucket, + ) + + wait_all_processed(i + 1) + + s3_clients_after = get_created_s3_clients_count() + + assert s3_clients_before == s3_clients_after diff --git a/tests/performance/array_element.xml b/tests/performance/array_element.xml index 1f82b833380..c3641f426f3 100644 --- a/tests/performance/array_element.xml +++ b/tests/performance/array_element.xml @@ -2,4 +2,20 @@ SELECT count() FROM numbers(100000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], []][number % 2 + 2]) + + + select materialize(array(array(1,2,3,4)))[1] from numbers(10000000) format Null + select materialize(array(array(1,2,3,4)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(array('hello', 'world')))[1] from numbers(10000000) format Null + select materialize(array(array('hello', 'world')))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(map('hello', 1, 'world', 2)))[1] from numbers(10000000) format Null + select materialize(array(map('hello', 1, 'world', 2)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[1] from numbers(10000000) format Null + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[materialize(1)] from numbers(10000000) format Null diff --git a/tests/performance/multiif.xml b/tests/performance/multiif.xml new file mode 100644 index 00000000000..ad56ab3f5f2 --- /dev/null +++ b/tests/performance/multiif.xml @@ -0,0 +1,8 @@ + + CREATE TABLE test_multiif_t(d Nullable(Int64)) ENGINE Memory + INSERT INTO test_multiif_t SELECT * from numbers(300000000) + + select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1 + + DROP TABLE IF EXISTS test_multiif_t + diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 6d1a271355e..7dc522dca7a 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -1223,7 +1223,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name, 'slower' test_status, - test_desc_.2 test_duration_ms, + test_desc_.2*1e3 test_duration_ms, 'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url from queries array join map('old', left, 'new', right) as test_desc_ @@ -1232,7 +1232,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name, 'unstable' test_status, - test_desc_.2 test_duration_ms, + test_desc_.2*1e3 test_duration_ms, 'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url from queries array join map('old', left, 'new', right) as test_desc_ diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference new file mode 100644 index 00000000000..31a4360469f --- /dev/null +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference @@ -0,0 +1,6 @@ +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) +2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) +3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) +2 +10001 diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql new file mode 100644 index 00000000000..92748efd2d1 --- /dev/null +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.sql @@ -0,0 +1,50 @@ +--Tags: no-fasttest, no-cpu-aarch64, no-cpu-s390x +-- no-fasttest because ZSTD_QAT isn't available in fasttest +-- no-cpu-aarch64 and no-cpu-s390x because ZSTD_QAT is x86-only + +SET enable_zstd_qat_codec = 1; + +-- Suppress test failures because stderr contains warning "Initialization of hardware-assisted ZSTD_QAT codec failed, falling back to software ZSTD coded." +SET send_logs_level = 'fatal'; + +DROP TABLE IF EXISTS compression_codec; + +-- negative test +CREATE TABLE compression_codec(id UInt64 CODEC(ZSTD_QAT(0))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } +CREATE TABLE compression_codec(id UInt64 CODEC(ZSTD_QAT(13))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } + +CREATE TABLE compression_codec( + id UInt64 CODEC(ZSTD_QAT), + data String CODEC(ZSTD_QAT), + ddd Date CODEC(ZSTD_QAT), + ddd32 Date32 CODEC(ZSTD_QAT), + somenum Float64 CODEC(ZSTD_QAT), + somestr FixedString(3) CODEC(ZSTD_QAT), + othernum Int64 CODEC(ZSTD_QAT), + somearray Array(UInt8) CODEC(ZSTD_QAT), + somemap Map(String, UInt32) CODEC(ZSTD_QAT), + sometuple Tuple(UInt16, UInt64) CODEC(ZSTD_QAT), +) ENGINE = MergeTree() ORDER BY tuple(); + +SHOW CREATE TABLE compression_codec; + +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); + +SELECT * FROM compression_codec ORDER BY id; + +OPTIMIZE TABLE compression_codec FINAL; + +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8, [10,11,12], map('k7',7,'k8',8), tuple(7,8)); + +DETACH TABLE compression_codec; +ATTACH TABLE compression_codec; + +SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; + +INSERT INTO compression_codec SELECT 3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6) FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM compression_codec WHERE id = 3 GROUP BY id; + +DROP TABLE IF EXISTS compression_codec; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 1a3a271528c..f9f5c2bd3df 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -150,6 +150,7 @@ SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER',' SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM +SYSTEM JEMALLOC ['SYSTEM JEMALLOC PURGE','SYSTEM JEMALLOC ENABLE PROFILE','SYSTEM JEMALLOC DISABLE PROFILE','SYSTEM JEMALLOC FLUSH PROFILE'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference index 64bb6b746dc..209e3ef4b62 100644 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference @@ -1 +1 @@ -30 +20 diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index f07ffc02e4f..9d9c6b920b6 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --connect_timeout_with_failover_ms 1 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index 6e23ab9cdb9..c25f308eda8 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + -- Check remerge_sort_lowered_memory_bytes_ratio setting set max_memory_usage='300Mi'; diff --git a/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference new file mode 100644 index 00000000000..ee824fc9e0a --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.reference @@ -0,0 +1,3 @@ +81920 +81920 +81920 diff --git a/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql new file mode 100644 index 00000000000..1286b0e74eb --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_ast_rewrite_settings.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS aggregate_functions_null_for_empty; + +CREATE TABLE aggregate_functions_null_for_empty (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO aggregate_functions_null_for_empty SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS aggregate_functions_null_for_empty = true; + +SELECT count() FROM aggregate_functions_null_for_empty; + +DROP TABLE aggregate_functions_null_for_empty; + +DROP TABLE IF EXISTS transform_null_in; + +CREATE TABLE transform_null_in (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y in (1,2,3)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO transform_null_in SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS transform_null_in = true; + +SELECT count() FROM transform_null_in; + +DROP TABLE transform_null_in; + +DROP TABLE IF EXISTS legacy_column_name_of_tuple_literal; + +CREATE TABLE legacy_column_name_of_tuple_literal (`x` UInt32, `y` UInt64, PROJECTION p (SELECT sum(y in (1,2,3)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO legacy_column_name_of_tuple_literal SELECT number, number * 2 FROM numbers(8192 * 10) SETTINGS legacy_column_name_of_tuple_literal = true; + +SELECT count() FROM legacy_column_name_of_tuple_literal; + +DROP TABLE legacy_column_name_of_tuple_literal; diff --git a/tests/queries/0_stateless/01753_fix_clickhouse_format.reference b/tests/queries/0_stateless/01753_fix_clickhouse_format.reference index 0aad4d64c55..735d4099534 100644 --- a/tests/queries/0_stateless/01753_fix_clickhouse_format.reference +++ b/tests/queries/0_stateless/01753_fix_clickhouse_format.reference @@ -1,5 +1,4 @@ -SELECT 1 -; +SELECT 1; SELECT 1 UNION ALL @@ -10,8 +9,7 @@ UNION ALL ) ; -SELECT 1 -; +SELECT 1; SELECT 1 UNION ALL @@ -22,4 +20,6 @@ UNION ALL ) ; +INSERT INTO t VALUES (1); + OK diff --git a/tests/queries/0_stateless/01753_fix_clickhouse_format.sh b/tests/queries/0_stateless/01753_fix_clickhouse_format.sh index 5cdd53b2166..ba7fe949833 100755 --- a/tests/queries/0_stateless/01753_fix_clickhouse_format.sh +++ b/tests/queries/0_stateless/01753_fix_clickhouse_format.sh @@ -8,4 +8,6 @@ echo "select 1; select 1 union all (select 1 union distinct select 1); " | $CL echo "select 1; select 1 union all (select 1 union distinct select 1); -- comment " | $CLICKHOUSE_FORMAT -n; -echo "insert into t values (1); " | $CLICKHOUSE_FORMAT -n 2>&1 \ | grep -F -q "Code: 578" && echo 'OK' || echo 'FAIL' +echo "insert into t values (1); " | $CLICKHOUSE_FORMAT -n + +echo 'insert into t format JSONEachRow {"a":1};' | $CLICKHOUSE_FORMAT -n 2>&1 \ | grep -F -q "NOT_IMPLEMENTED" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02245_make_datetime64.reference b/tests/queries/0_stateless/02245_make_datetime64.reference index 0ac672ae54d..1c7d31788e3 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.reference +++ b/tests/queries/0_stateless/02245_make_datetime64.reference @@ -67,3 +67,4 @@ DateTime64(7, \'UTC\') 1900-01-01 00:00:00.000 1900-01-01 00:00:00.000 1900-01-01 00:00:00.000 +2024-01-08 11:12:13.014 diff --git a/tests/queries/0_stateless/02245_make_datetime64.sql b/tests/queries/0_stateless/02245_make_datetime64.sql index 62784cb9b75..71629ad8dff 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.sql +++ b/tests/queries/0_stateless/02245_make_datetime64.sql @@ -82,6 +82,9 @@ select makeDateTime64(1991, 8, 24, 65537, 4, 0); select makeDateTime64(1991, 8, 24, 21, 65537, 0); select makeDateTime64(1991, 8, 24, 21, 4, 65537); +-- bug 58590 +select makeDateTime64(2024, 1, 8, 11, 12, 13, materialize(14)); + select makeDateTime64(year, 1, 1, 1, 0, 0, 0, precision, timezone) from ( select 1984 as year, 5 as precision, 'UTC' as timezone union all diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index e2d1ec3980e..2bba75f6788 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -1,7 +1,7 @@ [multi] insert into foo settings max_threads=1 Syntax error (query): failed at position 40 (end of query): [multi] insert into foo format tsv settings max_threads=1 -Can't format ASTInsertQuery with data, since data will be lost. +NOT_IMPLEMENTED [multi] insert into foo format tsv settings max_threads=1 INSERT INTO foo SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 8b156ffec83..49aa56d6c0a 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -25,7 +25,7 @@ function run_format_both() run_format 'insert into foo settings max_threads=1' |& grep --max-count 2 --only-matching -e "Syntax error (query): failed at position .* (end of query):" -e '^\[.*$' # compatibility -run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "Can't format ASTInsertQuery with data, since data will be lost." -e '^\[.*$' +run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "NOT_IMPLEMENTED" -e '^\[.*$' run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert |& grep --max-count 2 --only-matching -e "You have SETTINGS before and after FORMAT" -e '^\[.*$' diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index 8f08bd6f84b..a08928a773c 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ - ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) + ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) READ_ROWS=$(echo "${CURL_OUTPUT}" | \ grep 'X-ClickHouse-Summary' | \ diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 21a1b643d98..bb12d1f251f 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -1,23 +1,23 @@ -- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ --- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, --- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, --- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, --- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, --- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, --- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, --- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, --- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, --- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, --- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, --- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, -- dynamic(null), 'why', 'Professional', 'Partial College', 38 -- ] DROP TABLE IF EXISTS Customers; CREATE TABLE Customers -( +( FirstName Nullable(String), - LastName String, + LastName String, Occupation String, Education String, Age Nullable(UInt8) @@ -89,9 +89,9 @@ print '-- Summarize following sort --'; Customers | sort by FirstName | summarize count() by Occupation | sort by Occupation; print '-- summarize with bin --'; -EventLog | summarize count=count() by bin(Created, 1000); -EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); -EventLog | summarize count=count() by time_label=bin(Created/1000, 1s); +EventLog | summarize count=count() by bin(Created, 1000) | sort by count asc; +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s) | sort by count asc; +EventLog | summarize count=count() by time_label=bin(Created/1000, 1s) | sort by count asc; Dates | project bin(datetime(EventTime), 1m); print '-- make_list_with_nulls --'; Customers | summarize t = make_list_with_nulls(FirstName); diff --git a/tests/queries/0_stateless/02373_progress_contain_result.sh b/tests/queries/0_stateless/02373_progress_contain_result.sh index c87a5ec7615..fd343df1013 100755 --- a/tests/queries/0_stateless/02373_progress_contain_result.sh +++ b/tests/queries/0_stateless/02373_progress_contain_result.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo 'SELECT 1 FROM numbers(100)' | - ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | + ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"elapsed_ns[^}]*//' diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference index 676fb441f53..e08bc754a71 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference @@ -1,11 +1,3 @@ -storage String -source String -format String -additional_format_info String -registration_time DateTime -schema Nullable(String) -number_of_rows Nullable(UInt64) -schema_inference_mode Nullable(String) x Nullable(Int64) s Nullable(String) x Nullable(Int64) diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql index 310e22ed31f..64b6cd86fc7 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql @@ -4,7 +4,6 @@ set input_format_json_try_infer_numbers_from_strings=1; insert into function file('02374_data1.jsonl') select number as x, 'str' as s from numbers(10); insert into function file('02374_data2.jsonl') select number as x, 'str' as s from numbers(10); -desc system.schema_inference_cache; system drop schema cache for file; desc file('02374_data1.jsonl'); diff --git a/tests/queries/0_stateless/02494_query_cache_user_isolation.reference b/tests/queries/0_stateless/02494_query_cache_user_isolation.reference new file mode 100644 index 00000000000..f8c4b31b22a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_isolation.reference @@ -0,0 +1,28 @@ +Attack 1 +0 +system.query_cache with old user 1 +0 +0 1 +1 0 +system.query_cache with new user 0 +0 +0 1 +1 0 +0 1 +Attack 2 +-- policy_1 test +1 1 +3 1 +6 1 +-- policy_2 test +2 2 +5 2 +8 2 +-- policy_1 with query cache test +1 1 +3 1 +6 1 +-- policy_2 with query cache test +2 2 +5 2 +8 2 diff --git a/tests/queries/0_stateless/02494_query_cache_user_isolation.sh b/tests/queries/0_stateless/02494_query_cache_user_isolation.sh new file mode 100755 index 00000000000..d55e2460619 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_isolation.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest, long +# Tag no-parallel: Messes with internal cache +# no-fasttest: Produces wrong results in fasttest, unclear why, didn't reproduce locally. +# long: Sloooow ... + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# -- Attack 1: +# - create a user, +# - run a query whose result is stored in the query cache, +# - drop the user, recreate it with the same name +# - test that the cache entry is inaccessible + +echo "Attack 1" + +rnd=`tr -dc 1-9 (exponent, in) +-- INSERT INTO t_leading_zeroes_f VALUES (100, '00009e00009', 00009e00009, 9e9), (101, '-00009e00009', -00009e00009, -9e9), (102, '+00009e00009', +00009e00009, 9e9) + +SELECT 'Leading zeroes into Float32'; +SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id; + +DROP TABLE IF EXISTS t_leading_zeroes; +DROP TABLE IF EXISTS t_leading_zeroes_f; \ No newline at end of file diff --git a/tests/queries/0_stateless/02918_multif_for_nullable.reference b/tests/queries/0_stateless/02918_multif_for_nullable.reference new file mode 100644 index 00000000000..f58086cfee1 --- /dev/null +++ b/tests/queries/0_stateless/02918_multif_for_nullable.reference @@ -0,0 +1,5 @@ +-1 -1 -1 +1 -1 -1 +1 1 -1 +1 2 \N +1 3 \N diff --git a/tests/queries/0_stateless/02918_multif_for_nullable.sh b/tests/queries/0_stateless/02918_multif_for_nullable.sh new file mode 100755 index 00000000000..cd9ac8b904f --- /dev/null +++ b/tests/queries/0_stateless/02918_multif_for_nullable.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (d Nullable(Int64)) engine=Memory" +$CLICKHOUSE_CLIENT -q "insert into test_tbl select * from numbers(5)" +$CLICKHOUSE_CLIENT -q "select multiIf(d > 0, 1, -1), multiIf(d > 1, d-1, -1), multiIf(d > 2, null, -1) from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference new file mode 100644 index 00000000000..2d97dd0e12e --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference @@ -0,0 +1,29 @@ +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; +0 250 +1 250 +2 250 +3 250 +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql new file mode 100644 index 00000000000..b9bc6974c47 --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS 02918_parallel_replicas; + +CREATE TABLE 02918_parallel_replicas (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x); + +INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM numbers(1000); + +SET prefer_localhost_replica=0; + +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; + +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; + +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +-- { echoOff } + +DROP TABLE 02918_parallel_replicas; diff --git a/tests/queries/0_stateless/02932_idna.reference b/tests/queries/0_stateless/02932_idna.reference new file mode 100644 index 00000000000..0947194c07f --- /dev/null +++ b/tests/queries/0_stateless/02932_idna.reference @@ -0,0 +1,88 @@ +-- Negative tests +-- Regular cases +straße.de xn--strae-oqa.de xn--strae-oqa.de straße.de straße.de +2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 2001:4860:4860::8888 +AMAZON amazon amazon amazon amazon +aa-- aa-- aa-- aa-- aa-- +a†-- xn--a---kp0a xn--a---kp0a a†-- a†-- +ab--c ab--c ab--c ab--c ab--c +-† xn----xhn xn----xhn -† -† +-x.xn--zca -x.xn--zca -x.xn--zca -x.ß -x.ß +x-.xn--zca x-.xn--zca x-.xn--zca x-.ß x-.ß +x-.ß x-.xn--zca x-.xn--zca x-.ß x-.ß +x..ß x..xn--zca x..xn--zca x..ß x..ß +128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 128.0,0.1 +xn--zca.xn--zca xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß +xn--zca.ß xn--zca.xn--zca xn--zca.xn--zca ß.ß ß.ß +x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x x01234567890123456789012345678901234567890123456789012345678901x +x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß +x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.xn--zca x01234567890123456789012345678901234567890123456789012345678901x.ß x01234567890123456789012345678901234567890123456789012345678901x.ß +01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x 01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x +≠ xn--1ch xn--1ch ≠ ≠ +aa-- aa-- aa-- aa-- +ab--c ab--c ab--c ab--c +-x -x -x -x + +xn--1ch ≠ xn--1ch xn--1ch +xn--dqd20apc ᄎᆞᆷ xn--dqd20apc xn--dqd20apc +xn--gdh ≮ xn--gdh xn--gdh +xn--80aaa0ahbbeh4c йайзаакпий xn--80aaa0ahbbeh4c xn--80aaa0ahbbeh4c +xn--3bs854c 团淄 xn--3bs854c xn--3bs854c +xn--mgb9awbf عمان xn--mgb9awbf xn--mgb9awbf +xn--mgbaam7a8h امارات xn--mgbaam7a8h xn--mgbaam7a8h +xn--mgbbh1a71e بھارت xn--mgbbh1a71e xn--mgbbh1a71e +xn--s7y.com 短.com xn--s7y.com xn--s7y.com +xn--55qx5d.xn--tckwe 公司.コム xn--55qx5d.xn--tckwe xn--55qx5d.xn--tckwe +xn--4dbrk0ce ישראל xn--4dbrk0ce xn--4dbrk0ce +xn--zckzah テスト xn--zckzah xn--zckzah +xn--p1ai.com рф.com xn--p1ai.com xn--p1ai.com +xn--mxahbxey0c.gr εχαμπλε.gr xn--mxahbxey0c.gr xn--mxahbxey0c.gr +xn--h2brj9c भारत xn--h2brj9c xn--h2brj9c +xn--d1acpjx3f.xn--p1ai яндекс.рф xn--d1acpjx3f.xn--p1ai xn--d1acpjx3f.xn--p1ai +xn--q9jyb4c みんな xn--q9jyb4c xn--q9jyb4c +xn--sterreich-z7a.at österreich.at xn--sterreich-z7a.at xn--sterreich-z7a.at +xn--h2breg3eve.xn--h2brj9c भारतम्.भारत xn--h2breg3eve.xn--h2brj9c xn--h2breg3eve.xn--h2brj9c +ejemplo.xn--q9jyb4c ejemplo.みんな ejemplo.xn--q9jyb4c ejemplo.xn--q9jyb4c +xn--9t4b11yi5a.com 테스트.com xn--9t4b11yi5a.com xn--9t4b11yi5a.com +xn--gk3at1e.com 通販.com xn--gk3at1e.com xn--gk3at1e.com +xn--42c2d9a คอม xn--42c2d9a xn--42c2d9a +1xn-- 1xn-- 1xn-- 1xn-- +xn--bih.com ⌘.com xn--bih.com xn--bih.com +xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c موقع.وزارة-الأتصالات.مصر xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c +xn--mgbb9fbpob موبايلي xn--mgbb9fbpob xn--mgbb9fbpob +xn--55qw42g.xn--55qw42g 公益.公益 xn--55qw42g.xn--55qw42g xn--55qw42g.xn--55qw42g +≠ ≠ xn--1ch xn--1ch +ファッション.biz ファッション.biz xn--bck1b9a5dre4c.biz xn--bck1b9a5dre4c.biz +-- Special cases +---- Empty input + + + +---- NULL input +\N +\N +\N +---- Garbage inputs for idnaEncode + + + + +---- Long input +Row 1: +────── +idna: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +ascii: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk. +ascii_try: wenn sie ...xn-- vom hauptbahnhof in mnchen -n7c...xn-- mit zehn minuten, ohne, dass sie am flughafen noch einchecken mssen, dann starten sie im grunde genommen am flughafen -8gm... am ...xn-- am hauptbahnhof in mnchen starten sie ihren flug-0cf. zehn minuten.xn-- schauen sie sich mal die groen flughfen an, wenn sie in heathrow in london oder sonst wo, meine se -83h23c...xn-- charles de gaulle h in frankreich oder in -jvd...xn--h-zfa... in ... in...xn--h-zfa...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ...xn-- an den flughafen franz josef strau-z2c.xn-- dann starten sie praktisch hier am hauptbahnhof in mnchen-t9f.xn-- das bedeutet natrlich, dass der hauptbahnhof im grunde genommen nher an bayern -lxg23q...xn-- an die bayerischen stdte heranwchst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen-1hkk. +original: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen. +original_try: wenn sie ... vom hauptbahnhof in münchen ... mit zehn minuten, ohne, dass sie am flughafen noch einchecken müssen, dann starten sie im grunde genommen am flughafen ... am ... am hauptbahnhof in münchen starten sie ihren flug. zehn minuten. schauen sie sich mal die großen flughäfen an, wenn sie in heathrow in london oder sonst wo, meine se ... charles de gaulle äh in frankreich oder in ...äh... in ... in...äh...in rom. wenn sie sich mal die entfernungen ansehen, wenn sie frankfurt sich ansehen, dann werden sie feststellen, dass zehn minuten... sie jederzeit locker in frankfurt brauchen, um ihr gate zu finden. wenn sie vom flug ... vom ... vom hauptbahnhof starten - sie steigen in den hauptbahnhof ein, sie fahren mit dem transrapid in zehn minuten an den flughafen in ... an den flughafen franz josef strauß. dann starten sie praktisch hier am hauptbahnhof in münchen. das bedeutet natürlich, dass der hauptbahnhof im grunde genommen näher an bayern ... an die bayerischen städte heranwächst, weil das ja klar ist, weil auf dem hauptbahnhof viele linien aus bayern zusammenlaufen. +---- Non-const input + +münchen xn--mnchen-3ya xn--mnchen-3ya münchen münchen +straße.münchen.de xn--strae-oqa.xn--mnchen-3ya.de xn--strae-oqa.xn--mnchen-3ya.de straße.münchen.de straße.münchen.de +---- Non-const input with invalid values sprinkled in +london.co.uk london.co.uk london.co.uk +microsoft.com microsoft.com microsoft.com +xn-- +xn-- +xn--tešla +ytraße.münchen.de xn--ytrae-oqa.xn--mnchen-3ya.de ytraße.münchen.de diff --git a/tests/queries/0_stateless/02932_idna.sql b/tests/queries/0_stateless/02932_idna.sql new file mode 100644 index 00000000000..db7688064f2 --- /dev/null +++ b/tests/queries/0_stateless/02932_idna.sql @@ -0,0 +1,124 @@ +-- Tags: no-fasttest +-- no-fasttest: requires idna library + +-- See also 02932_punycode.sql + +SELECT '-- Negative tests'; + +SELECT idnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryIdnaEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT idnaDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT idnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tryIdnaEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT idnaDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT idnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryIdnaEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT idnaDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT idnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT tryIdnaEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT idnaDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } + +SELECT '-- Regular cases'; + +-- The test cases originate from the ada idna unit tests: +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_alternating.txt +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_unicode_alternating.txt +-- +SELECT 'straße.de' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '2001:4860:4860::8888' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'AMAZON' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'aa--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'a†--' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'ab--c' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '-†' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '-x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x-.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x-.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x..ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '128.0,0.1' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'xn--zca.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'xn--zca.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.xn--zca' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT 'x01234567890123456789012345678901234567890123456789012345678901x.ß' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; +SELECT '≠' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(idna) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try; + +SELECT 'aa--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ab--c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '-x' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--1ch' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--dqd20apc' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--gdh' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--80aaa0ahbbeh4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--3bs854c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgb9awbf' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbaam7a8h' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbbh1a71e' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--s7y.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--55qx5d.xn--tckwe' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--4dbrk0ce' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--zckzah' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--p1ai.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mxahbxey0c.gr' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--d1acpjx3f.xn--p1ai' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--sterreich-z7a.at' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--h2breg3eve.xn--h2brj9c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ejemplo.xn--q9jyb4c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--9t4b11yi5a.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--gk3at1e.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--42c2d9a' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '1xn--' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--bih.com' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--mgbb9fbpob' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'xn--55qw42g.xn--55qw42g' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT '≠' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +SELECT 'ファッション.biz' AS ascii, idnaDecode(ascii) AS unicode, idnaEncode(unicode) AS original, tryIdnaEncode(unicode) AS original_try; +-- +SELECT '-- Special cases'; + +SELECT '---- Empty input'; +SELECT idnaEncode(''); +SELECT tryIdnaEncode(''); +SELECT idnaDecode(''); + +SELECT '---- NULL input'; +SELECT idnaEncode(NULL); +SELECT tryIdnaEncode(NULL); +SELECT idnaDecode(NULL); + +SELECT '---- Garbage inputs for idnaEncode'; +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/to_ascii_invalid.txt +SELECT idnaEncode('xn--'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--'); +SELECT idnaEncode('ﻱa'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('ﻱa'); +SELECT idnaEncode('xn--a-yoc'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--a-yoc'); +SELECT idnaEncode('xn--tešla'); -- { serverError BAD_ARGUMENTS } +SELECT tryIdnaEncode('xn--tešla'); + +SELECT '---- Long input'; +SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FORMAT Vertical; + +SELECT '---- Non-const input'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna; +INSERT INTO tab VALUES ('straße.münchen.de') ('') ('münchen'); +SELECT idna, idnaEncode(idna) AS ascii, tryIdnaEncode(ascii) AS ascii_try, idnaDecode(ascii) AS original, idnaDecode(ascii_try) AS original_try FROM tab; +DROP TABLE tab; + +SELECT '---- Non-const input with invalid values sprinkled in'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (idna String) ENGINE=MergeTree ORDER BY idna; +INSERT INTO tab VALUES ('xn--') ('london.co.uk') ('ytraße.münchen.de') ('xn--tešla') ('microsoft.com') ('xn--'); +SELECT idna, idnaEncode(idna) AS ascii FROM tab; -- { serverError BAD_ARGUMENTS } +SELECT idna, tryIdnaEncode(idna) AS ascii, idnaDecode(ascii) AS original FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference new file mode 100644 index 00000000000..ff05eaa72a3 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.reference @@ -0,0 +1,55 @@ +-- Negative tests +-- Regular cases +a a- a a +A A- A A +-- --- -- -- +London London- London London +Lloyd-Atkinson Lloyd-Atkinson- Lloyd-Atkinson Lloyd-Atkinson +This has spaces This has spaces- This has spaces This has spaces +-> $1.00 <- -> $1.00 <-- -> $1.00 <- -> $1.00 <- +а 80a а а +ü tda ü ü +α mxa α α +例 fsq 例 例 +😉 n28h 😉 😉 +αβγ mxacd αβγ αβγ +München Mnchen-3ya München München +Mnchen-3ya Mnchen-3ya- Mnchen-3ya Mnchen-3ya +München-Ost Mnchen-Ost-9db München-Ost München-Ost +Bahnhof München-Ost Bahnhof Mnchen-Ost-u6b Bahnhof München-Ost Bahnhof München-Ost +abæcdöef abcdef-qua4k abæcdöef abæcdöef +правда 80aafi6cg правда правда +ยจฆฟคฏข 22cdfh1b8fsa ยจฆฟคฏข ยจฆฟคฏข +ドメイン名例 eckwd4c7cu47r2wf ドメイン名例 ドメイン名例 +MajiでKoiする5秒前 MajiKoi5-783gue6qz075azm5e MajiでKoiする5秒前 MajiでKoiする5秒前 +「bücher」 bcher-kva8445foa 「bücher」 「bücher」 +团淄 3bs854c 团淄 团淄 +-- Special cases +---- Empty input + + + +---- NULL input +\N +\N +\N +---- Garbage Punycode-encoded input + +---- Long input +Row 1: +────── +str: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +puny: Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa +original: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +original_try: Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen. +---- Non-const values +München Mnchen-3ya München München +abc abc- abc abc +aäoöuü aou-qla5gqb aäoöuü aäoöuü +---- Non-const values with invalid values sprinkled in +Also no punycode +London- London +Mnchen-3ya München +No punycode +Rtting-3ya Rütting +XYZ no punycode diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql new file mode 100644 index 00000000000..b9bcf933641 --- /dev/null +++ b/tests/queries/0_stateless/02932_punycode.sql @@ -0,0 +1,86 @@ +-- Tags: no-fasttest +-- no-fasttest: requires idna library + +-- See also 02932_idna.sql + +SELECT '-- Negative tests'; + +SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryPunycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tryPunycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryPunycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } +SELECT tryPunycodeDecode(toFixedString('two', 3)); -- { serverError NOT_IMPLEMENTED } + +SELECT '-- Regular cases'; + +-- The test cases originate from the ada idna unit tests: +-- - https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt + +SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try; +-- +SELECT '-- Special cases'; + +SELECT '---- Empty input'; +SELECT punycodeEncode(''); +SELECT punycodeDecode(''); +SELECT tryPunycodeDecode(''); + +SELECT '---- NULL input'; +SELECT punycodeEncode(NULL); +SELECT punycodeDecode(NULL); +SELECT tryPunycodeDecode(NULL); + +SELECT '---- Garbage Punycode-encoded input'; +SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS } +SELECT tryPunycodeDecode('no punycode'); + +SELECT '---- Long input'; +SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FORMAT Vertical; + +SELECT '---- Non-const values'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str; +INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München'); +SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original, tryPunycodeDecode(puny) AS original_try FROM tab; +DROP TABLE tab; + +SELECT '---- Non-const values with invalid values sprinkled in'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (puny String) ENGINE=MergeTree ORDER BY puny; +INSERT INTO tab VALUES ('Also no punycode') ('London-') ('Mnchen-3ya') ('No punycode') ('Rtting-3ya') ('XYZ no punycode'); +SELECT puny, punycodeDecode(puny) AS original FROM tab; -- { serverError BAD_ARGUMENTS } +SELECT puny, tryPunycodeDecode(puny) AS original FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02933_paste_join.reference b/tests/queries/0_stateless/02933_paste_join.reference index 84ae5987926..5ff13917957 100644 --- a/tests/queries/0_stateless/02933_paste_join.reference +++ b/tests/queries/0_stateless/02933_paste_join.reference @@ -72,3 +72,13 @@ UInt64 UInt64 UInt64 UInt64 +0 9 +1 8 +2 7 +3 6 +4 5 +5 4 +6 3 +7 2 +8 1 +9 0 diff --git a/tests/queries/0_stateless/02933_paste_join.sql b/tests/queries/0_stateless/02933_paste_join.sql index 1c346438d77..b103bf72160 100644 --- a/tests/queries/0_stateless/02933_paste_join.sql +++ b/tests/queries/0_stateless/02933_paste_join.sql @@ -32,6 +32,6 @@ INSERT INTO t2 SELECT number, number FROM numbers(15, 15); SELECT * FROM ( SELECT * from t1 ) t1 PASTE JOIN ( SELECT * from t2 ) t2 SETTINGS max_threads = 1; SELECT toTypeName(a) FROM (SELECT number as a FROM numbers(11)) t1 PASTE JOIN (select number as a from numbers(10)) t2 SETTINGS join_use_nulls = 1; SET max_threads = 2; +select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10; select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } -select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=3; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02933_sqid.reference b/tests/queries/0_stateless/02933_sqid.reference index 4506cc4d01a..a559bacb0ac 100644 --- a/tests/queries/0_stateless/02933_sqid.reference +++ b/tests/queries/0_stateless/02933_sqid.reference @@ -1,13 +1,17 @@ -- negative tests -- const UInt* -Uk -XMbT -86Rf07 -Td1EnWQo +Uk [1] +XMbT [1,2] +86Rf07 [1,2,3] +Td1EnWQo [1,2,3,4] XMbT -- non-const UInt* -Uk +Uk [1] +XMbT [1,2] +86Rf07 [1,2,3] +Td1EnWQo [1,2,3,4] XMbT -86Rf07 -Td1EnWQo +-- invalid sqid +[] +-- alias XMbT diff --git a/tests/queries/0_stateless/02933_sqid.sql b/tests/queries/0_stateless/02933_sqid.sql index 3a2873e9c34..81d4b2bc35c 100644 --- a/tests/queries/0_stateless/02933_sqid.sql +++ b/tests/queries/0_stateless/02933_sqid.sql @@ -3,19 +3,27 @@ SET allow_suspicious_low_cardinality_types = 1; SELECT '-- negative tests'; -SELECT sqid(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT sqid('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sqidEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sqidDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sqidEncode('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sqidDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT '-- const UInt*'; -SELECT sqid(1); -SELECT sqid(1, 2); -SELECT sqid(1, 2, 3); -SELECT sqid(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64); -SELECT sqid(toNullable(1), toLowCardinality(2)); +SELECT sqidEncode(1) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1, 2) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1, 2, 3) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(toNullable(1), toLowCardinality(2)) AS sqid; SELECT '-- non-const UInt*'; -SELECT sqid(materialize(1)); -SELECT sqid(materialize(1), materialize(2)); -SELECT sqid(materialize(1), materialize(2), materialize(3)); -SELECT sqid(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)); -SELECT sqid(toNullable(materialize(1)), toLowCardinality(materialize(2))); +SELECT sqidEncode(materialize(1)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1), materialize(2)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1), materialize(2), materialize(3)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)) AS sqid, sqidDecode(sqid); +SELECT sqidEncode(toNullable(materialize(1)), toLowCardinality(materialize(2))); + +SELECT '-- invalid sqid'; +SELECT sqidDecode('invalid sqid'); + +SELECT '-- alias'; +SELECT sqid(1, 2); diff --git a/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference new file mode 100644 index 00000000000..a13ab627f94 --- /dev/null +++ b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.reference @@ -0,0 +1,9 @@ +1 a 1 +2 a 1 +3 b 1 +4 b 1 +1 \N 1 +2 \N 1 +3 \N 1 +4 \N 1 +\N \N 4 diff --git a/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql new file mode 100644 index 00000000000..03bb7f9e770 --- /dev/null +++ b/tests/queries/0_stateless/02943_exprs_order_in_group_by_with_rollup.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test_group_by_with_rollup_order; + +CREATE TABLE test_group_by_with_rollup_order (id Int64, a Nullable(Int64), b Nullable(String)) ENGINE = MergeTree ORDER BY id; + +insert into test_group_by_with_rollup_order values(1,1,'a'); +insert into test_group_by_with_rollup_order values(2,2,'a'); +insert into test_group_by_with_rollup_order values(3,3,'b'); +insert into test_group_by_with_rollup_order values(4,4,'b'); + +SELECT toString(a) as r1, b, count() FROM test_group_by_with_rollup_order GROUP BY r1, b WITH ROLLUP ORDER BY b,r1; + +DROP TABLE IF EXISTS test_group_by_with_rollup_order; diff --git a/tests/queries/0_stateless/02946_format_values.reference b/tests/queries/0_stateless/02946_format_values.reference new file mode 100644 index 00000000000..90b2a3cb8ef --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.reference @@ -0,0 +1,141 @@ +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td') +====================================== +SELECT a +FROM table1 +; + +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +SELECT b +FROM table1 +; + +====================================== +-- begin +SELECT a +FROM table1 +; + +-- some insert query +INSERT INTO table1 VALUES (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +-- more comments +-- in a row +SELECT b +FROM table1 +; + +-- end +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT + b, + c, + d +FROM table1 +; + +SELECT + b, + c, + d, + e +FROM table1 +; + +SELECT + b, + c, + d, + e, + f +FROM table1 +; + +SELECT + b, + c +FROM +( + SELECT + b, + c + FROM table1 +) +; + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT b, c, d FROM table1; + +SELECT b, c, d, e FROM table1; + +SELECT b, c, d, e, f FROM table1; + +SELECT b, c FROM (SELECT b, c FROM table1); + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +SELECT b, c, d, e, f FROM (SELECT b, c, d, e, f FROM table1) +====================================== +Option 'max_line_length' must be less than 256. +2 +Options 'oneline' and 'max_line_length' are mutually exclusive. +2 diff --git a/tests/queries/0_stateless/02946_format_values.sh b/tests/queries/0_stateless/02946_format_values.sh new file mode 100755 index 00000000000..36e32de42fa --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "insert into table1 values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td')" | ${CLICKHOUSE_FORMAT} + +echo "======================================" + +cat <&1; echo $?; } +{ echo "select 1" | ${CLICKHOUSE_FORMAT} --comments --max_line_length=120 --oneline 2>&1; echo $?; } diff --git a/tests/queries/0_stateless/02947_dropped_tables_parts.reference b/tests/queries/0_stateless/02947_dropped_tables_parts.reference new file mode 100644 index 00000000000..086d55c3d93 --- /dev/null +++ b/tests/queries/0_stateless/02947_dropped_tables_parts.reference @@ -0,0 +1,2 @@ +default 02947_table_1 all_1_1_0 +default 02947_table_2 all_1_1_0 diff --git a/tests/queries/0_stateless/02947_dropped_tables_parts.sql b/tests/queries/0_stateless/02947_dropped_tables_parts.sql new file mode 100644 index 00000000000..554a19ca6b1 --- /dev/null +++ b/tests/queries/0_stateless/02947_dropped_tables_parts.sql @@ -0,0 +1,14 @@ + +DROP TABLE IF EXISTS 02947_table_1; +DROP TABLE IF EXISTS 02947_table_2; + +CREATE TABLE 02947_table_1 (id Int32) Engine=MergeTree() ORDER BY id; +CREATE TABLE 02947_table_2 (id Int32) Engine=MergeTree() ORDER BY id; +INSERT INTO 02947_table_1 VALUES (1),(2); +INSERT INTO 02947_table_2 VALUES (3),(4); + +SELECT database, table, name FROM system.parts WHERE database = currentDatabase() AND startsWith(table, '02947_table_'); +select * from system.dropped_tables_parts format Null; + +DROP TABLE 02947_table_1; +DROP TABLE 02947_table_2; diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference new file mode 100644 index 00000000000..d3a002c4fd4 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -0,0 +1,4 @@ +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.000 +2023-10-09 10:11:12.000 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql new file mode 100644 index 00000000000..178f21a9e63 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql @@ -0,0 +1,4 @@ +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000500', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000499', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference new file mode 100644 index 00000000000..bbb479df203 --- /dev/null +++ b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.reference @@ -0,0 +1,10 @@ +(0,'2024-01-01') +(0,'2024-01-02') +(0,'2024-01-03') +(0,'2024-01-04') +(0,'2024-01-05') +(0,'2024-01-06') +(0,'2024-01-07') +(0,'2024-01-08') +(0,'2024-01-09') +(0,'2024-01-10') diff --git a/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql new file mode 100644 index 00000000000..8836d10b8f8 --- /dev/null +++ b/tests/queries/0_stateless/02961_analyzer_low_cardinality_fuzzer.sql @@ -0,0 +1,19 @@ +set allow_suspicious_low_cardinality_types = true; + +CREATE TABLE test_tuple_filter__fuzz_2 +( + `id` Nullable(UInt32), + `value` LowCardinality(String), + `log_date` LowCardinality(Date) +) +ENGINE = MergeTree +PARTITION BY log_date +ORDER BY id +SETTINGS allow_nullable_key = 1; + +INSERT INTO test_tuple_filter__fuzz_2 SELECT number, toString(number), toDate('2024-01-01') + number FROM numbers(10); + +SELECT + (tuple(log_date) = tuple('2021-01-01'), log_date) +FROM test_tuple_filter__fuzz_2 +ORDER BY log_date; diff --git a/tests/queries/0_stateless/02961_output_format_compress_params.reference b/tests/queries/0_stateless/02961_output_format_compress_params.reference new file mode 100644 index 00000000000..d0752a77fc7 --- /dev/null +++ b/tests/queries/0_stateless/02961_output_format_compress_params.reference @@ -0,0 +1,2 @@ +1 +1000000 diff --git a/tests/queries/0_stateless/02961_output_format_compress_params.sh b/tests/queries/0_stateless/02961_output_format_compress_params.sh new file mode 100755 index 00000000000..7275f9a0b2b --- /dev/null +++ b/tests/queries/0_stateless/02961_output_format_compress_params.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: replica + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +file_with_random_postfix=test_02961_`date +%s%6N`.csv + +${CLICKHOUSE_CLIENT} --query "INSERT INTO FUNCTION file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SELECT number FROM numbers(1000000) SETTINGS output_format_compression_level = 10, output_format_compression_zstd_window_log = 30, engine_file_truncate_on_insert = 1;" +# Simple check that output_format_compression_zstd_window_log = 30 works +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SETTINGS zstd_window_log_max = 29;" 2>&1 | head -n 1 | grep -c "ZSTD_DECODER_FAILED" +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file('${file_with_random_postfix}', 'CSV', 'x UInt64', 'zstd') SETTINGS zstd_window_log_max = 30;" diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql new file mode 100644 index 00000000000..669018a1308 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql @@ -0,0 +1,8 @@ +set count_distinct_optimization = 1; + +SELECT uniqExact('257') +FROM + (SELECT + number, CAST(number / 9223372036854775806, 'UInt64') AS m + FROM numbers(3) + ); diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.reference b/tests/queries/0_stateless/02962_join_using_bug_57894.reference new file mode 100644 index 00000000000..454655081df --- /dev/null +++ b/tests/queries/0_stateless/02962_join_using_bug_57894.reference @@ -0,0 +1,66 @@ +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +\N diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.sql b/tests/queries/0_stateless/02962_join_using_bug_57894.sql new file mode 100644 index 00000000000..87aef8b1a71 --- /dev/null +++ b/tests/queries/0_stateless/02962_join_using_bug_57894.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS r; +SET allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE t (`x` UInt32, `s` LowCardinality(String)) ENGINE = Memory; +INSERT INTO t SELECT number, toString(number) FROM numbers(5); + +CREATE TABLE r (`x` LowCardinality(Nullable(UInt32)), `s` Nullable(String)) ENGINE = Memory; +INSERT INTO r SELECT number, toString(number) FROM numbers(2, 8); +INSERT INTO r VALUES (NULL, NULL); + +SET allow_experimental_analyzer = 0; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +; + + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'partial_merge'; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'full_sorting_merge'; + +SET allow_experimental_analyzer = 1; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'partial_merge'; + +SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/02962_max_joined_block_rows.reference b/tests/queries/0_stateless/02962_max_joined_block_rows.reference new file mode 100644 index 00000000000..8bc1bad225b --- /dev/null +++ b/tests/queries/0_stateless/02962_max_joined_block_rows.reference @@ -0,0 +1,32 @@ +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +-- +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +-- +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 diff --git a/tests/queries/0_stateless/02962_max_joined_block_rows.sql b/tests/queries/0_stateless/02962_max_joined_block_rows.sql new file mode 100644 index 00000000000..c31ab5e1132 --- /dev/null +++ b/tests/queries/0_stateless/02962_max_joined_block_rows.sql @@ -0,0 +1,38 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE table t1 (a UInt64, b UInt64) ENGINE = Memory; +INSERT INTO t1 SELECT number % 2, number FROM numbers(10); + +CREATE table t2 (a UInt64) ENGINE = Memory; + +INSERT INTO t2 SELECT number % 2 FROM numbers(10); + +-- block size is always multiple of 5 because we have 5 rows for each key in right table +-- we do not split rows corresponding to the same key + +SELECT max(bs) <= 5, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 5; + +SELECT '--'; + +SELECT max(bs) <= 10, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 10; + +SELECT '--'; + +-- parallel_hash doen't support max_joined_block_size_rows + +SET join_algorithm = 'parallel_hash'; + +SELECT max(bs) > 10, b FROM ( + SELECT blockSize() as bs, * FROM t1 JOIN t2 ON t1.a = t2.a +) GROUP BY b +ORDER BY b +SETTINGS max_joined_block_size_rows = 10; diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference new file mode 100644 index 00000000000..f18a39e191e --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference @@ -0,0 +1,18 @@ +sales 15000 +sales 15000 +sales 15000 +sales 29400 +sales 29400 +sales 29400 +sales 43800 +sales 43800 +sales 43800 +sales 15000 5000 +sales 15000 5000 +sales 15000 5000 +sales 29400 4800 +sales 29400 4800 +sales 29400 4800 +sales 43800 4800 +sales 43800 4800 +sales 43800 4800 diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql new file mode 100644 index 00000000000..90af415c5ea --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql @@ -0,0 +1,32 @@ +CREATE TABLE empsalary +( + `depname` LowCardinality(String), + `empno` UInt64, + `salary` Int32, + `enroll_date` Date +) +ENGINE = Memory; + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +-- 1 window function + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary +FROM empsalary +order by depsalary; + + +-- 2 window functions with different window, +-- but result should be the same for depsalary + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) AS depminsalary +FROM empsalary +order by depsalary; diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference new file mode 100644 index 00000000000..5816b4eb49b --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference @@ -0,0 +1 @@ +3 4 diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql new file mode 100644 index 00000000000..92f7cc0671c --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql @@ -0,0 +1,9 @@ +drop table if exists a; + +create table a (i int, j int, projection p (select * order by j)) engine MergeTree partition by i order by tuple() settings index_granularity = 1; + +insert into a values (1, 2), (0, 5), (3, 4); + +select * from a where i > 0 and j = 4 settings force_index_by_date = 1; + +drop table a; diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index d1f868eba97..484dd46a90c 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -1,173 +1,98 @@ +#include +#include +#include #include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DB::MySQLCharsetPtr charset = std::make_shared(); -static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( - DB::MySQLReplication::EventHeader & header, DB::ReadBuffer & payload, - std::shared_ptr & last_table_map_event, bool exist_checksum) +bool quit = false; +void signal_handler(int) { + quit = true; +} + +static void processBinlogFromFile(const std::string & bin_path, bool disable_checksum) +{ + DB::MySQLReplication::BinlogFromFile binlog; + binlog.open(bin_path); + binlog.setChecksum(disable_checksum ? DB::MySQLReplication::IBinlog::NONE : DB::MySQLReplication::IBinlog::CRC32); + DB::MySQLReplication::BinlogEventPtr event; - DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, - /* trow_exception */ false, /* exact_limit */ std::nullopt); - DB::ReadBufferPtr event_payload = std::make_shared(*limit_read_buffer, exist_checksum ? 4 : 0); - - switch (header.type) + while (binlog.tryReadEvent(event, /*timeout*/ 0) && !quit) { - case DB::MySQLReplication::FORMAT_DESCRIPTION_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::ROTATE_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::QUERY_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - - auto query = std::static_pointer_cast(event); - switch (query->typ) - { - case DB::MySQLReplication::QUERY_EVENT_MULTI_TXN_FLAG: - case DB::MySQLReplication::QUERY_EVENT_XA: - { - event = std::make_shared(std::move(query->header)); - break; - } - default: - break; - } - break; - } - case DB::MySQLReplication::XID_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::TABLE_MAP_EVENT: - { - DB::MySQLReplication::TableMapEventHeader map_event_header; - map_event_header.parse(*event_payload); - event = std::make_shared(std::move(header), map_event_header, charset); - event->parseEvent(*event_payload); - last_table_map_event = std::static_pointer_cast(event); - break; - } - case DB::MySQLReplication::WRITE_ROWS_EVENT_V1: - case DB::MySQLReplication::WRITE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::DELETE_ROWS_EVENT_V1: - case DB::MySQLReplication::DELETE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1: - case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2: - { - DB::MySQLReplication::RowsEventHeader rows_header(header.type); - rows_header.parse(*event_payload); - event = std::make_shared(last_table_map_event, std::move(header), rows_header); - event->parseEvent(*event_payload); - break; - } - case DB::MySQLReplication::GTID_EVENT: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } - default: - { - event = std::make_shared(std::move(header)); - event->parseEvent(*event_payload); - break; - } + DB::WriteBufferFromOStream cout(std::cout); + event->dump(cout); + binlog.getPosition().dump(cout); + cout.finalize(); } - - return event; } -static int checkBinLogFile(const std::string & bin_path, bool exist_checksum) +static void processBinlogFromSocket(const std::string & host, int port, const std::string & user, const std::string & password, const std::string & executed_gtid_set, bool disable_checksum) { - DB::ReadBufferFromFile in(bin_path); - DB::assertString("\xfe\x62\x69\x6e", in); /// magic number + DB::MySQLReplication::BinlogFromSocket binlog; + binlog.setChecksum(disable_checksum ? DB::MySQLReplication::IBinlog::NONE : DB::MySQLReplication::IBinlog::CRC32); - DB::MySQLReplication::BinlogEventPtr last_event; - std::shared_ptr last_header; - std::shared_ptr table_map; + binlog.connect(host, port, user, password); + binlog.start(/*unique number*/ 42, executed_gtid_set); + DB::MySQLReplication::BinlogEventPtr event; - try + while (!quit) { - while (!in.eof()) + if (binlog.tryReadEvent(event, /*timeout*/ 100)) { - last_header = std::make_shared(); - last_header->parse(in); - last_event = parseSingleEventBody(*last_header, in, table_map, exist_checksum); + if (event->header.type != DB::MySQLReplication::HEARTBEAT_EVENT) + { + DB::WriteBufferFromOStream cout(std::cout); + event->dump(cout); + binlog.getPosition().dump(cout); + cout.finalize(); + } } } - catch (...) - { - DB::WriteBufferFromOStream cerr(std::cerr); - cerr << "Unable to parse MySQL binlog event. Code: " << DB::getCurrentExceptionCode() << ", Exception message: " - << DB::getCurrentExceptionMessage(false) << '\n' << ", Previous event: " << '\n'; - last_event->dump(cerr); - cerr << '\n' << ", Event header: " << '\n'; - last_header->dump(cerr); - cerr << '\n'; - return DB::getCurrentExceptionCode(); - } - - DB::WriteBufferFromOStream cout(std::cout); - cout << "Check passed. " << '\n' << "No exception was thrown." << '\n' << "The last binlog event: " << '\n'; - last_event->dump(cout); - cout << '\n'; - return 0; } - int main(int argc, char ** argv) { + (void)signal(SIGINT, signal_handler); boost::program_options::options_description desc("Allowed options"); - desc.add_options()("help,h", "Produce help message"); - desc.add_options()("disable_checksum", "Disable checksums in binlog files."); - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + std::string host = "127.0.0.1"; + int port = 3306; + std::string user = "root"; + std::string password; + std::string gtid; - if (options.count("help") || argc < 2) + desc.add_options() + ("help", "Produce help message") + ("disable_checksum", "Disable checksums in binlog files.") + ("binlog", boost::program_options::value(), "Binlog file") + ("host", boost::program_options::value(&host)->default_value(host), "Host to connect") + ("port", boost::program_options::value(&port)->default_value(port), "Port number to connect") + ("user", boost::program_options::value(&user)->default_value(user), "User") + ("password", boost::program_options::value(&password), "Password") + ("gtid", boost::program_options::value(>id), "Executed gtid set"); + + try { - std::cout << "Usage: " << argv[0] << " mysql_binlog_file" << std::endl; - std::cout << desc << std::endl; - return 1; + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + boost::program_options::notify(options); + + if (options.count("help") || (!options.count("binlog") && !options.count("gtid"))) + { + std::cout << "Usage: " << argv[0] << std::endl; + std::cout << desc << std::endl; + return EXIT_FAILURE; + } + + if (options.count("binlog")) + processBinlogFromFile(options["binlog"].as(), options.count("disable_checksum")); + else + processBinlogFromSocket(host, port, user, password, gtid, options.count("disable_checksum")); + } + catch (std::exception & ex) + { + std::cerr << ex.what() << std::endl; + return EXIT_FAILURE; } - return checkBinLogFile(argv[argc - 1], !options.count("disable_checksum")); + return EXIT_SUCCESS; } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 08a9f355b10..95a6ae50d1f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -198,6 +198,7 @@ DELETEs DESC DIEs DOGEFI +Damerau DataGrip DataLens DataTime @@ -344,6 +345,7 @@ Hypot IANA IDE IDEs +IDNA IMDS INFILE INSERTed @@ -413,6 +415,7 @@ JSONType JSONs Jaeger Jannis +Jaro JavaHash Jemalloc Jepsen @@ -701,8 +704,6 @@ PrettySpaceMonoBlock PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock Prewhere -TotalPrimaryKeyBytesInMemory -TotalPrimaryKeyBytesInMemoryAllocated PrivateKeyPassphraseHandler ProfileEvents Profiler @@ -714,8 +715,10 @@ Promtail Protobuf ProtobufSingle ProxySQL +Punycode PyArrow PyCharm +QATlib QEMU QTCreator Quantile @@ -725,6 +728,7 @@ QueryCacheHits QueryCacheMisses QueryPreempted QueryThread +QuickAssist QuoteMeta RBAC RClickHouse @@ -913,6 +917,7 @@ TimescaleDB's Timeunit TinyLog Tkachenko +ToASCII ToCenterChild ToChildren ToGeo @@ -921,10 +926,13 @@ ToIPv ToParent ToSnowflake ToString +ToUnicode Toolset TopK TotalBytesOfMergeTreeTables TotalPartsOfMergeTreeTables +TotalPrimaryKeyBytesInMemory +TotalPrimaryKeyBytesInMemoryAllocated TotalRowsOfMergeTreeTables TotalTemporaryFiles Tradeoff @@ -977,6 +985,7 @@ Werror Wether WikiStat WindowView +Winkler WithNames WithNamesAndTypes WordNet @@ -993,6 +1002,7 @@ YYYYMMDDToDate YYYYMMDDhhmmssToDateTime Yandex Yasm +ZSTDQAT Zabbix Zipkin ZooKeeper @@ -1339,6 +1349,7 @@ cutToFirstSignificantSubdomainWithWWW cutURLParameter cutWWW cyrus +damerauLevenshteinDistance datacenter datacenters datafiles @@ -1452,6 +1463,7 @@ endian endianness endsWith endsWithUTF +endswith enqueued enum enum's @@ -1624,6 +1636,8 @@ hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull +hasall +hasany hashtables haversine hdbc @@ -1652,6 +1666,8 @@ hyvor icosahedron icudata idempotency +idnaDecode +idnaEncode ifNotFinite ifNull iframe @@ -1696,6 +1712,8 @@ isZeroOrNull iteratively jaccard jaccardIndex +jaroSimilarity +jaroWinklerSimilarity javaHash javaHashUTF jbod @@ -1791,6 +1809,7 @@ llvm loadDefaultCAFile localhost localread +loess logTrace logagent loghouse @@ -1849,14 +1868,14 @@ metrica metroHash mfedotov minMap +minSampleSizeContinuous +minSampleSizeConversion mindsdb minimalistic mininum miniselect minmap minmax -minSampleSizeContinuous -minSampleSizeConversion mins misconfiguration mispredictions @@ -1886,6 +1905,7 @@ multidirectory multiline multiplyDecimal multipolygon +multisearchany multisets multithread multiword @@ -1938,7 +1958,9 @@ notEquals notILike notIn notLike +notempty notequals +notlike notretry nowInBlock ntile @@ -2076,6 +2098,8 @@ pseudorandom pseudorandomize psql ptrs +punycodeDecode +punycodeEncode pushdown pwrite py @@ -2239,6 +2263,7 @@ seektable sequenceCount sequenceMatch sequenceNextNode +seriesDecomposeSTL seriesPeriodDetectFFT serverTimeZone serverTimezone @@ -2284,6 +2309,8 @@ splitByString splitByWhitespace splitby sqid +sqidDecode +sqidEncode sql sqlalchemy sqlinsert @@ -2294,6 +2321,7 @@ stacktrace stacktraces startsWith startsWithUTF +startswith statbox stateful stddev @@ -2522,6 +2550,8 @@ trimRight trunc tryBase tryDecrypt +tryIdnaEncode +tryPunycodeDecode tskv tsv tui diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 88b43afff26..daee2e7fb00 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -420,6 +420,9 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep - # Check for bad punctuation: whitespace before comma. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" +# Check usage of std::regex which is too bloated and slow. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number 'std::regex' | grep -P '.' && echo "^ Please use re2 instead of std::regex" + # Cyrillic characters hiding inside Latin. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."