mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 20:12:02 +00:00
Merge remote-tracking branch 'rschu1ze/master' into qc_isolation
This commit is contained in:
commit
36173bbb7b
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -245,6 +245,12 @@
|
|||||||
[submodule "contrib/idxd-config"]
|
[submodule "contrib/idxd-config"]
|
||||||
path = contrib/idxd-config
|
path = contrib/idxd-config
|
||||||
url = https://github.com/intel/idxd-config
|
url = https://github.com/intel/idxd-config
|
||||||
|
[submodule "contrib/QAT-ZSTD-Plugin"]
|
||||||
|
path = contrib/QAT-ZSTD-Plugin
|
||||||
|
url = https://github.com/intel/QAT-ZSTD-Plugin
|
||||||
|
[submodule "contrib/qatlib"]
|
||||||
|
path = contrib/qatlib
|
||||||
|
url = https://github.com/intel/qatlib
|
||||||
[submodule "contrib/wyhash"]
|
[submodule "contrib/wyhash"]
|
||||||
path = contrib/wyhash
|
path = contrib/wyhash
|
||||||
url = https://github.com/wangyi-fudan/wyhash
|
url = https://github.com/wangyi-fudan/wyhash
|
||||||
|
@ -33,7 +33,7 @@ curl https://clickhouse.com/ | sh
|
|||||||
|
|
||||||
## Upcoming Events
|
## Upcoming Events
|
||||||
|
|
||||||
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com.
|
||||||
|
|
||||||
## Recent Recordings
|
## Recent Recordings
|
||||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||||
|
26
contrib/CMakeLists.txt
vendored
26
contrib/CMakeLists.txt
vendored
@ -172,9 +172,9 @@ add_contrib (s2geometry-cmake s2geometry)
|
|||||||
add_contrib (c-ares-cmake c-ares)
|
add_contrib (c-ares-cmake c-ares)
|
||||||
|
|
||||||
if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42)
|
if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42)
|
||||||
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
|
option (ENABLE_QPL "Enable Intel® Query Processing Library (QPL)" ${ENABLE_LIBRARIES})
|
||||||
elseif(ENABLE_QPL)
|
elseif(ENABLE_QPL)
|
||||||
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher")
|
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 with SSE 4.2 or higher")
|
||||||
endif()
|
endif()
|
||||||
if (ENABLE_QPL)
|
if (ENABLE_QPL)
|
||||||
add_contrib (idxd-config-cmake idxd-config)
|
add_contrib (idxd-config-cmake idxd-config)
|
||||||
@ -183,6 +183,28 @@ else()
|
|||||||
message(STATUS "Not using QPL")
|
message(STATUS "Not using QPL")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (OS_LINUX AND ARCH_AMD64)
|
||||||
|
option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES})
|
||||||
|
elseif(ENABLE_QATLIB)
|
||||||
|
message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64")
|
||||||
|
endif()
|
||||||
|
if (ENABLE_QATLIB)
|
||||||
|
option (ENABLE_QAT_USDM_DRIVER "A User Space DMA-able Memory (USDM) component which allocates/frees DMA-able memory" OFF)
|
||||||
|
option (ENABLE_QAT_OUT_OF_TREE_BUILD "Using out-of-tree driver, user needs to customize ICP_ROOT variable" OFF)
|
||||||
|
set(ICP_ROOT "" CACHE STRING "ICP_ROOT variable to define the path of out-of-tree driver package")
|
||||||
|
if (ENABLE_QAT_OUT_OF_TREE_BUILD)
|
||||||
|
if (ICP_ROOT STREQUAL "")
|
||||||
|
message(FATAL_ERROR "Please define the path of out-of-tree driver package with -DICP_ROOT=xxx or disable out-of-tree build with -DENABLE_QAT_OUT_OF_TREE_BUILD=OFF; \
|
||||||
|
If you want out-of-tree build but have no package available, please download and build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html")
|
||||||
|
endif ()
|
||||||
|
else()
|
||||||
|
add_contrib (qatlib-cmake qatlib) # requires: isa-l
|
||||||
|
endif ()
|
||||||
|
add_contrib (QAT-ZSTD-Plugin-cmake QAT-ZSTD-Plugin)
|
||||||
|
else()
|
||||||
|
message(STATUS "Not using QATLib")
|
||||||
|
endif ()
|
||||||
|
|
||||||
add_contrib (morton-nd-cmake morton-nd)
|
add_contrib (morton-nd-cmake morton-nd)
|
||||||
if (ARCH_S390X)
|
if (ARCH_S390X)
|
||||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 2f5f52c4d8c87c2a3a3d101ca3a0194c9b77526f
|
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
|
1
contrib/QAT-ZSTD-Plugin
vendored
Submodule
1
contrib/QAT-ZSTD-Plugin
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit e5a134e12d2ea8a5b0f3b83c5b1c325fda4eb0a8
|
85
contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt
Normal file
85
contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a plugin to Zstandard*(ZSTD*) for accelerating compression by QAT.
|
||||||
|
# ENABLE_QAT_OUT_OF_TREE_BUILD = 1 means kernel don't have native support, user will build and install driver from external package: https://www.intel.com/content/www/us/en/download/765501.html
|
||||||
|
# meanwhile, user need to set ICP_ROOT environment variable which point to the root directory of QAT driver source tree.
|
||||||
|
# ENABLE_QAT_OUT_OF_TREE_BUILD = 0 means kernel has built-in qat driver, QAT-ZSTD-PLUGIN just has dependency on qatlib.
|
||||||
|
|
||||||
|
if (ENABLE_QAT_OUT_OF_TREE_BUILD)
|
||||||
|
message(STATUS "Intel QATZSTD out-of-tree build, ICP_ROOT:${ICP_ROOT}")
|
||||||
|
|
||||||
|
set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src")
|
||||||
|
set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c")
|
||||||
|
set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
|
||||||
|
set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include")
|
||||||
|
set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc")
|
||||||
|
set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include")
|
||||||
|
set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv")
|
||||||
|
set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so")
|
||||||
|
set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so")
|
||||||
|
if (ENABLE_QAT_USDM_DRIVER)
|
||||||
|
add_definitions(-DENABLE_USDM_DRV)
|
||||||
|
endif()
|
||||||
|
add_library(_qatzstd_plugin ${QATZSTD_SRC})
|
||||||
|
target_link_libraries (_qatzstd_plugin PUBLIC ${USDM_LIBRARY} ${QAT_S_LIBRARY})
|
||||||
|
target_include_directories(_qatzstd_plugin
|
||||||
|
SYSTEM PUBLIC "${QATZSTD_SRC_DIR}"
|
||||||
|
PRIVATE ${QAT_INCLUDE_DIR}
|
||||||
|
${QAT_DC_INCLUDE_DIR}
|
||||||
|
${QAT_AL_INCLUDE_DIR}
|
||||||
|
${QAT_USDM_INCLUDE_DIR}
|
||||||
|
${ZSTD_LIBRARY_DIR})
|
||||||
|
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC)
|
||||||
|
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
|
||||||
|
else () # In-tree build
|
||||||
|
message(STATUS "Intel QATZSTD in-tree build")
|
||||||
|
set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src")
|
||||||
|
set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c")
|
||||||
|
set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
|
||||||
|
|
||||||
|
# please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html
|
||||||
|
set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib")
|
||||||
|
set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include")
|
||||||
|
set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc")
|
||||||
|
set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include")
|
||||||
|
set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv")
|
||||||
|
set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so")
|
||||||
|
set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so")
|
||||||
|
set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib")
|
||||||
|
set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
|
||||||
|
|
||||||
|
file(MAKE_DIRECTORY
|
||||||
|
"${LIBQAT_HEADER_DIR}/qat"
|
||||||
|
)
|
||||||
|
file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h"
|
||||||
|
DESTINATION "${LIBQAT_HEADER_DIR}/qat/"
|
||||||
|
)
|
||||||
|
file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h"
|
||||||
|
DESTINATION "${LIBQAT_HEADER_DIR}/qat/"
|
||||||
|
)
|
||||||
|
file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h"
|
||||||
|
DESTINATION "${LIBQAT_HEADER_DIR}/qat/"
|
||||||
|
)
|
||||||
|
file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h"
|
||||||
|
DESTINATION "${LIBQAT_HEADER_DIR}/qat/"
|
||||||
|
)
|
||||||
|
file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h"
|
||||||
|
DESTINATION "${LIBQAT_HEADER_DIR}/qat/"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (ENABLE_QAT_USDM_DRIVER)
|
||||||
|
add_definitions(-DENABLE_USDM_DRV)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(_qatzstd_plugin ${QATZSTD_SRC})
|
||||||
|
target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm)
|
||||||
|
target_include_directories(_qatzstd_plugin PRIVATE
|
||||||
|
${QAT_INCLUDE_DIR}
|
||||||
|
${QAT_DC_INCLUDE_DIR}
|
||||||
|
${QAT_AL_INCLUDE_DIR}
|
||||||
|
${QAT_USDM_INCLUDE_DIR}
|
||||||
|
${ZSTD_LIBRARY_DIR}
|
||||||
|
${LIBQAT_HEADER_DIR})
|
||||||
|
target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE)
|
||||||
|
target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $<BUILD_INTERFACE:${QATZSTD_SRC_DIR}> $<INSTALL_INTERFACE:include>)
|
||||||
|
add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin)
|
||||||
|
endif ()
|
||||||
|
|
1
contrib/qatlib
vendored
Submodule
1
contrib/qatlib
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit abe15d7bfc083117bfbb4baee0b49ffcd1c03c5c
|
213
contrib/qatlib-cmake/CMakeLists.txt
Normal file
213
contrib/qatlib-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
# Intel® QuickAssist Technology Library (QATlib).
|
||||||
|
|
||||||
|
message(STATUS "Intel QATlib ON")
|
||||||
|
set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib")
|
||||||
|
set(LIBQAT_DIR "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src")
|
||||||
|
set(LIBOSAL_DIR "${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src")
|
||||||
|
set(OPENSSL_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl")
|
||||||
|
|
||||||
|
# Build 3 libraries: _qatmgr, _osal, _qatlib
|
||||||
|
# Produce ch_contrib::qatlib by linking these libraries.
|
||||||
|
|
||||||
|
# _qatmgr
|
||||||
|
|
||||||
|
SET(LIBQATMGR_sources ${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_client.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/qat_mgr_lib.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/qat_log.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/vfio_lib.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_proto.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_pfvf_vf_msg.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_pf.c)
|
||||||
|
|
||||||
|
add_library(_qatmgr ${LIBQATMGR_sources})
|
||||||
|
|
||||||
|
target_include_directories(_qatmgr PRIVATE
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include
|
||||||
|
${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include)
|
||||||
|
|
||||||
|
target_compile_definitions(_qatmgr PRIVATE -DUSER_SPACE)
|
||||||
|
target_compile_options(_qatmgr PRIVATE -Wno-error=int-conversion)
|
||||||
|
|
||||||
|
# _osal
|
||||||
|
|
||||||
|
SET(LIBOSAL_sources
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalSemaphore.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalThread.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalMutex.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalSpinLock.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalAtomic.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalServices.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalUsrKrnProxy.c
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/OsalCryptoInterface.c)
|
||||||
|
|
||||||
|
add_library(_osal ${LIBOSAL_sources})
|
||||||
|
|
||||||
|
target_include_directories(_osal PRIVATE
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include
|
||||||
|
${OPENSSL_DIR}/include
|
||||||
|
${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include
|
||||||
|
${ClickHouse_SOURCE_DIR}/contrib/qatlib-cmake/include)
|
||||||
|
|
||||||
|
target_compile_definitions(_osal PRIVATE -DOSAL_ENSURE_ON -DUSE_OPENSSL)
|
||||||
|
|
||||||
|
# _qatlib
|
||||||
|
SET(LIBQAT_sources
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_buffers.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_chain.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_datapath.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_dp.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_header_footer.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_header_footer_lz4.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_session.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_stats.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_err_sim.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_ns_datapath.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_ns_header_footer.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_crc32.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_crc64.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_xxhash32.c
|
||||||
|
${LIBQAT_DIR}/common/compression/icp_sal_dc_err_sim.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_control_path.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_data_path.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_interface_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/diffie_hellman/lac_dh_stats.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/dsa/lac_dsa_interface_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_common.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_montedwds.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ec_nist_curves.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdh.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecdsa.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_ecsm2.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/ecc/lac_kpt_ecdsa.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/large_number/lac_ln_interface_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_mmp.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_qat_comms.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/pke_common/lac_pke_utils.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/prime/lac_prime_interface_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_control_path.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_decrypt.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_encrypt.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_interface_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_keygen.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_rsa_stats.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/asym/rsa/lac_kpt_rsa_decrypt.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/drbg/lac_sym_drbg_api.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/key/lac_sym_key.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_alg_chain.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_api.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_auth_enc.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_cb.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_cipher.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_compile_check.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_dp.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_partial.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_queue.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_stats.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/nrbg/lac_sym_nrbg_api.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_cipher.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_constants_table.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_hash_defs_lookup.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/qat/lac_sym_qat_key.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/sym/lac_sym_hash_sw_precomputes.c
|
||||||
|
${LIBQAT_DIR}/common/crypto/kpt/provision/lac_kpt_provision.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_compression.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_create_services.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_ctrl_services.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_list.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_crypto.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_dc_chain.c
|
||||||
|
${LIBQAT_DIR}/common/ctrl/sal_instances.c
|
||||||
|
${LIBQAT_DIR}/common/qat_comms/sal_qat_cmn_msg.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_buffer_desc.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_log_message.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_mem.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_mem_pools.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_sw_responses.c
|
||||||
|
${LIBQAT_DIR}/common/utils/lac_sync.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_service_state.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_statistics.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_misc_error_stats.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_string_parse.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_user_process.c
|
||||||
|
${LIBQAT_DIR}/common/utils/sal_versions.c
|
||||||
|
${LIBQAT_DIR}/common/device/sal_dev_info.c
|
||||||
|
${LIBQAT_DIR}/user/sal_user.c
|
||||||
|
${LIBQAT_DIR}/user/sal_user_dyn_instance.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_process_proxy.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_cfg.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_device.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_dyn.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_ETring_mgr_dp.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_init.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_ring.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/common/adf_user_transport_ctrl.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_cfg.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_ring.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_bundles.c
|
||||||
|
${LIBQAT_DIR}/qat_direct/vfio/adf_vfio_user_proxy.c
|
||||||
|
${LIBQAT_DIR}/common/compression/dc_crc_base.c)
|
||||||
|
|
||||||
|
add_library(_qatlib ${LIBQAT_sources})
|
||||||
|
|
||||||
|
target_include_directories(_qatlib PRIVATE
|
||||||
|
${CMAKE_SYSROOT}/usr/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/include
|
||||||
|
${LIBOSAL_DIR}/linux/user_space/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/include/lac
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/include/dc
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/qat/drivers/crypto/qat/qat_common
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/compression/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/sym/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/crypto/asym/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/firmware/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/common/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/common/include
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/src/qat_direct/vfio
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space
|
||||||
|
${LIBQAT_ROOT_DIR}/quickassist/utilities/osal/src/linux/user_space/include
|
||||||
|
${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include)
|
||||||
|
|
||||||
|
target_link_libraries(_qatlib PRIVATE _qatmgr _osal OpenSSL::SSL ch_contrib::isal)
|
||||||
|
target_compile_definitions(_qatlib PRIVATE -DUSER_SPACE -DLAC_BYTE_ORDER=__LITTLE_ENDIAN -DOSAL_ENSURE_ON)
|
||||||
|
target_link_options(_qatlib PRIVATE -pie -z relro -z now -z noexecstack)
|
||||||
|
target_compile_options(_qatlib PRIVATE -march=native)
|
||||||
|
add_library (ch_contrib::qatlib ALIAS _qatlib)
|
||||||
|
|
||||||
|
# _usdm
|
||||||
|
|
||||||
|
set(LIBUSDM_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib/quickassist/utilities/libusdm_drv")
|
||||||
|
set(LIBUSDM_sources
|
||||||
|
${LIBUSDM_DIR}/user_space/vfio/qae_mem_utils_vfio.c
|
||||||
|
${LIBUSDM_DIR}/user_space/qae_mem_utils_common.c
|
||||||
|
${LIBUSDM_DIR}/user_space/vfio/qae_mem_hugepage_utils_vfio.c)
|
||||||
|
|
||||||
|
add_library(_usdm ${LIBUSDM_sources})
|
||||||
|
|
||||||
|
target_include_directories(_usdm PRIVATE
|
||||||
|
${ClickHouse_SOURCE_DIR}/contrib/sysroot/linux-x86_64-musl/include
|
||||||
|
${LIBUSDM_DIR}
|
||||||
|
${LIBUSDM_DIR}/include
|
||||||
|
${LIBUSDM_DIR}/user_space)
|
||||||
|
|
||||||
|
add_library (ch_contrib::usdm ALIAS _usdm)
|
14
contrib/qatlib-cmake/include/mqueue.h
Normal file
14
contrib/qatlib-cmake/include/mqueue.h
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
/* This is a workaround for a build conflict issue
|
||||||
|
1. __GLIBC_PREREQ (referenced in OsalServices.c) is only defined in './sysroot/linux-x86_64/include/features.h'
|
||||||
|
2. mqueue.h only exist under './sysroot/linux-x86_64-musl/'
|
||||||
|
This cause target_include_directories for _osal has a conflict between './sysroot/linux-x86_64/include' and './sysroot/linux-x86_64-musl/'
|
||||||
|
hence create mqueue.h separately under ./qatlib-cmake/include as an alternative.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Major and minor version number of the GNU C library package. Use
|
||||||
|
these macros to test for features in specific releases. */
|
||||||
|
#define __GLIBC__ 2
|
||||||
|
#define __GLIBC_MINOR__ 27
|
||||||
|
|
||||||
|
#define __GLIBC_PREREQ(maj, min) \
|
||||||
|
((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
|
@ -41,6 +41,10 @@ readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLIC
|
|||||||
readarray -t DISKS_METADATA_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.metadata_path' || true)
|
readarray -t DISKS_METADATA_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.metadata_path' || true)
|
||||||
|
|
||||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||||
|
CLICKHOUSE_PASSWORD_FILE="${CLICKHOUSE_PASSWORD_FILE:-}"
|
||||||
|
if [[ -n "${CLICKHOUSE_PASSWORD_FILE}" && -f "${CLICKHOUSE_PASSWORD_FILE}" ]]; then
|
||||||
|
CLICKHOUSE_PASSWORD="$(cat "${CLICKHOUSE_PASSWORD_FILE}")"
|
||||||
|
fi
|
||||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||||
CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
|
CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
|
||||||
|
@ -44,6 +44,9 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR
|
|||||||
# It is not needed, we will explicitly create tables on s3.
|
# It is not needed, we will explicitly create tables on s3.
|
||||||
# We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository.
|
# We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository.
|
||||||
rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml
|
rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml
|
||||||
|
|
||||||
|
rm /etc/clickhouse-server/config.d/storage_metadata_with_full_object_key.xml
|
||||||
|
rm /etc/clickhouse-server/config.d/s3_storage_policy_with_template_object_key.xml
|
||||||
fi
|
fi
|
||||||
|
|
||||||
function start()
|
function start()
|
||||||
|
@ -193,6 +193,7 @@ stop
|
|||||||
|
|
||||||
# Let's enable S3 storage by default
|
# Let's enable S3 storage by default
|
||||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||||
|
export $RANDOMIZE_OBJECT_KEY_TYPE=1
|
||||||
export ZOOKEEPER_FAULT_INJECTION=1
|
export ZOOKEEPER_FAULT_INJECTION=1
|
||||||
configure
|
configure
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec
|
|||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
|
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
|
||||||
ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
|
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression])
|
||||||
[SETTINGS]
|
[SETTINGS]
|
||||||
[mode = 'unordered',]
|
[mode = 'unordered',]
|
||||||
[after_processing = 'keep',]
|
[after_processing = 'keep',]
|
||||||
|
@ -1143,6 +1143,8 @@ Optional parameters:
|
|||||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||||
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||||
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||||
|
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
|
||||||
|
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
|
||||||
|
|
||||||
### Configuring the cache
|
### Configuring the cache
|
||||||
|
|
||||||
|
176
docs/en/operations/settings/mysql-binlog-client.md
Normal file
176
docs/en/operations/settings/mysql-binlog-client.md
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
# The MySQL Binlog Client
|
||||||
|
|
||||||
|
The MySQL Binlog Client provides a mechanism in ClickHouse to share the binlog from a MySQL instance among multiple [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) databases. This avoids consuming unnecessary bandwidth and CPU when replicating more than one schema/database.
|
||||||
|
|
||||||
|
The implementation is resilient against crashes and disk issues. The executed GTID sets of the binlog itself and the consuming databases have persisted only after the data they describe has been safely persisted as well. The implementation also tolerates re-doing aborted operations (at-least-once delivery).
|
||||||
|
|
||||||
|
# Settings
|
||||||
|
|
||||||
|
## use_binlog_client
|
||||||
|
|
||||||
|
Forces to reuse existing MySQL binlog connection or creates new one if does not exist. The connection is defined by `user:pass@host:port`.
|
||||||
|
|
||||||
|
Default value: 0
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- create MaterializedMySQL databases that read the events from the binlog client
|
||||||
|
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1
|
||||||
|
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
|
||||||
|
CREATE DATABASE db3 ENGINE = MaterializedMySQL('host:port', 'db3', 'user2', 'password2') SETTINGS use_binlog_client=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Databases `db1` and `db2` will use the same binlog connection, since they use the same `user:pass@host:port`. Database `db3` will use separate binlog connection.
|
||||||
|
|
||||||
|
## max_bytes_in_binlog_queue
|
||||||
|
|
||||||
|
Defines the limit of bytes in the events binlog queue. If bytes in the queue increases this limit, it will stop reading new events from MySQL until the space for new events will be freed. This introduces the memory limits. Very high value could consume all available memory. Very low value could make the databases to wait for new events.
|
||||||
|
|
||||||
|
Default value: 67108864
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432
|
||||||
|
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
|
||||||
|
```
|
||||||
|
|
||||||
|
If database `db1` is unable to consume binlog events fast enough and the size of the events queue exceeds `33554432` bytes, reading of new events from MySQL is postponed until `db1`
|
||||||
|
consumes the events and releases some space.
|
||||||
|
|
||||||
|
NOTE: This will impact to `db2`, and it will be waiting for new events too, since they share the same connection.
|
||||||
|
|
||||||
|
## max_milliseconds_to_wait_in_binlog_queue
|
||||||
|
|
||||||
|
Defines the max milliseconds to wait when `max_bytes_in_binlog_queue` exceeded. After that it will detach the database from current binlog connection and will retry establish new one to prevent other databases to wait for this database.
|
||||||
|
|
||||||
|
Default value: 10000
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432, max_milliseconds_to_wait_in_binlog_queue=1000
|
||||||
|
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
|
||||||
|
```
|
||||||
|
|
||||||
|
If the event queue of database `db1` is full, the binlog connection will be waiting in `1000`ms and if the database is not able to consume the events, it will be detached from the connection to create another one.
|
||||||
|
|
||||||
|
NOTE: If the database `db1` has been detached from the shared connection and created new one, after the binlog connections for `db1` and `db2` have the same positions they will be merged to one. And `db1` and `db2` will use the same connection again.
|
||||||
|
|
||||||
|
## max_bytes_in_binlog_dispatcher_buffer
|
||||||
|
|
||||||
|
Defines the max bytes in the binlog dispatcher's buffer before it is flushed to attached binlog. The events from MySQL binlog connection are buffered before sending to attached databases. It increases the events throughput from the binlog to databases.
|
||||||
|
|
||||||
|
Default value: 1048576
|
||||||
|
|
||||||
|
## max_flush_milliseconds_in_binlog_dispatcher
|
||||||
|
|
||||||
|
Defines the max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlog. If there are no events received from MySQL binlog connection for a while, after some time buffered events should be sent to the attached databases.
|
||||||
|
|
||||||
|
Default value: 1000
|
||||||
|
|
||||||
|
# Design
|
||||||
|
|
||||||
|
## The Binlog Events Dispatcher
|
||||||
|
|
||||||
|
Currently each MaterializedMySQL database opens its own connection to MySQL to subscribe to binlog events. There is a need to have only one connection and _dispatch_ the binlog events to all databases that replicate from the same MySQL instance.
|
||||||
|
|
||||||
|
## Each MaterializedMySQL Database Has Its Own Event Queue
|
||||||
|
|
||||||
|
To prevent slowing down other instances there should be an _event queue_ per MaterializedMySQL database to handle the events independently of the speed of other instances. The dispatcher reads an event from the binlog, and sends it to every MaterializedMySQL database that needs it. Each database handles its events in separate threads.
|
||||||
|
|
||||||
|
## Catching up
|
||||||
|
|
||||||
|
If several databases have the same binlog position, they can use the same dispatcher. If a newly created database (or one that has been detached for some time) requests events that have been already processed, we need to create another communication _channel_ to the binlog. We do this by creating another temporary dispatcher for such databases. When the new dispatcher _catches up with_ the old one, the new/temporary dispatcher is not needed anymore and all databases getting events from this dispatcher can be moved to the old one.
|
||||||
|
|
||||||
|
## Memory Limit
|
||||||
|
|
||||||
|
There is a _memory limit_ to control event queue memory consumption per MySQL Client. If a database is not able to handle events fast enough, and the event queue is getting full, we have the following options:
|
||||||
|
|
||||||
|
1. The dispatcher is blocked until the slowest database frees up space for new events. All other databases are waiting for the slowest one. (Preferred)
|
||||||
|
2. The dispatcher is _never_ blocked, but suspends incremental sync for the slow database and continues dispatching events to remained databases.
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
A lot of CPU can be saved by not processing every event in every database. The binlog contains events for all databases, it is wasteful to distribute row events to a database that it will not process it, especially if there are a lot of databases. This requires some sort of per-database binlog filtering and buffering.
|
||||||
|
|
||||||
|
Currently all events are sent to all MaterializedMySQL databases but parsing the event which consumes CPU is up to the database.
|
||||||
|
|
||||||
|
# Detailed Design
|
||||||
|
|
||||||
|
1. If a client (e.g. database) wants to read a stream of the events from MySQL binlog, it creates a connection to remote binlog by host/user/password and _executed GTID set_ params.
|
||||||
|
2. If another client wants to read the events from the binlog but for different _executed GTID set_, it is **not** possible to reuse existing connection to MySQL, then need to create another connection to the same remote binlog. (_This is how it is implemented today_).
|
||||||
|
3. When these 2 connections get the same binlog positions, they read the same events. It is logical to drop duplicate connection and move all its users out. And now one connection dispatches binlog events to several clients. Obviously only connections to the same binlog should be merged.
|
||||||
|
|
||||||
|
## Classes
|
||||||
|
|
||||||
|
1. One connection can send (or dispatch) events to several clients and might be called `BinlogEventsDispatcher`.
|
||||||
|
2. Several dispatchers grouped by _user:password@host:port_ in `BinlogClient`. Since they point to the same binlog.
|
||||||
|
3. The clients should communicate only with public API from `BinlogClient`. The result of using `BinlogClient` is an object that implements `IBinlog` to read events from. This implementation of `IBinlog` must be compatible with old implementation `MySQLFlavor` -> when replacing old implementation by new one, the behavior must not be changed.
|
||||||
|
|
||||||
|
## SQL
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- create MaterializedMySQL databases that read the events from the binlog client
|
||||||
|
CREATE DATABASE db1_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=1024;
|
||||||
|
CREATE DATABASE db2_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1;
|
||||||
|
CREATE DATABASE db3_client1 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1;
|
||||||
|
CREATE DATABASE db4_client2 ENGINE = MaterializedMySQL('host2:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1;
|
||||||
|
CREATE DATABASE db5_client3 ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=1;
|
||||||
|
CREATE DATABASE db6_old ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=0;
|
||||||
|
```
|
||||||
|
|
||||||
|
Databases `db1_client1`, `db2_client1` and `db3_client1` share one instance of `BinlogClient` since they have the same params. `BinlogClient` will create 3 connections to MySQL server thus 3 instances of `BinlogEventsDispatcher`, but if these connections would have the same binlog position, they should be merged to one connection. Means all clients will be moved to one dispatcher and others will be closed. Databases `db4_client2` and `db5_client3` would use 2 different independent `BinlogClient` instances. Database `db6_old` will use old implementation. NOTE: By default `use_binlog_client` is disabled. Setting `max_bytes_in_binlog_queue` defines the max allowed bytes in the binlog queue. By default, it is `1073741824` bytes. If number of bytes exceeds this limit, the dispatching will be stopped until the space will be freed for new events.
|
||||||
|
|
||||||
|
## Binlog Table Structure
|
||||||
|
|
||||||
|
To see the status of the all `BinlogClient` instances there is `system.mysql_binlogs` system table. It shows the list of all created and _alive_ `IBinlog` instances with information about its `BinlogEventsDispatcher` and `BinlogClient`.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
SELECT * FROM system.mysql_binlogs FORMAT Vertical
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
binlog_client_name: root@127.0.0.1:3306
|
||||||
|
name: test_Clickhouse1
|
||||||
|
mysql_binlog_name: binlog.001154
|
||||||
|
mysql_binlog_pos: 7142294
|
||||||
|
mysql_binlog_timestamp: 1660082447
|
||||||
|
mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304
|
||||||
|
dispatcher_name: Applier
|
||||||
|
dispatcher_mysql_binlog_name: binlog.001154
|
||||||
|
dispatcher_mysql_binlog_pos: 7142294
|
||||||
|
dispatcher_mysql_binlog_timestamp: 1660082447
|
||||||
|
dispatcher_mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304
|
||||||
|
size: 0
|
||||||
|
bytes: 0
|
||||||
|
max_bytes: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tests
|
||||||
|
|
||||||
|
Unit tests:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./unit_tests_dbms --gtest_filter=MySQLBinlog.*
|
||||||
|
```
|
||||||
|
|
||||||
|
Integration tests:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ pytest -s -vv test_materialized_mysql_database/test.py::test_binlog_client
|
||||||
|
```
|
||||||
|
|
||||||
|
Dumps events from the file
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./utils/check-mysql-binlog/check-mysql-binlog --binlog binlog.001392
|
||||||
|
```
|
||||||
|
|
||||||
|
Dumps events from the server
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./utils/check-mysql-binlog/check-mysql-binlog --host 127.0.0.1 --port 3306 --user root --password pass --gtid a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30462856
|
||||||
|
```
|
@ -4773,6 +4773,45 @@ Type: Int64
|
|||||||
|
|
||||||
Default: 0
|
Default: 0
|
||||||
|
|
||||||
|
## enable_deflate_qpl_codec {#enable_deflate_qpl_codec}
|
||||||
|
|
||||||
|
If turned on, the DEFLATE_QPL codec may be used to compress columns.
|
||||||
|
|
||||||
|
Possible values:
|
||||||
|
|
||||||
|
- 0 - Disabled
|
||||||
|
- 1 - Enabled
|
||||||
|
|
||||||
|
Type: Bool
|
||||||
|
|
||||||
|
## enable_zstd_qat_codec {#enable_zstd_qat_codec}
|
||||||
|
|
||||||
|
If turned on, the ZSTD_QAT codec may be used to compress columns.
|
||||||
|
|
||||||
|
Possible values:
|
||||||
|
|
||||||
|
- 0 - Disabled
|
||||||
|
- 1 - Enabled
|
||||||
|
|
||||||
|
Type: Bool
|
||||||
|
|
||||||
|
## output_format_compression_level
|
||||||
|
|
||||||
|
Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when writing to table functions `file`, `url`, `hdfs`, `s3`, or `azureBlobStorage`.
|
||||||
|
|
||||||
|
Possible values: from `1` to `22`
|
||||||
|
|
||||||
|
Default: `3`
|
||||||
|
|
||||||
|
|
||||||
|
## output_format_compression_zstd_window_log
|
||||||
|
|
||||||
|
Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression. This can help to achieve a better compression ratio.
|
||||||
|
|
||||||
|
Possible values: non-negative numbers. Note that if the value is too small or too big, `zstdlib` will throw an exception. Typical values are from `20` (window size = `1MB`) to `30` (window size = `1GB`).
|
||||||
|
|
||||||
|
Default: `0`
|
||||||
|
|
||||||
## rewrite_count_distinct_if_with_count_distinct_implementation
|
## rewrite_count_distinct_if_with_count_distinct_implementation
|
||||||
|
|
||||||
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.
|
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.
|
||||||
|
@ -11,6 +11,8 @@ Keys:
|
|||||||
- `--query` — Format queries of any length and complexity.
|
- `--query` — Format queries of any length and complexity.
|
||||||
- `--hilite` — Add syntax highlight with ANSI terminal escape sequences.
|
- `--hilite` — Add syntax highlight with ANSI terminal escape sequences.
|
||||||
- `--oneline` — Format in single line.
|
- `--oneline` — Format in single line.
|
||||||
|
- `--max_line_length` — Format in single line queries with length less than specified.
|
||||||
|
- `--comments` — Keep comments in the output.
|
||||||
- `--quiet` or `-q` — Just check syntax, no output on success.
|
- `--quiet` or `-q` — Just check syntax, no output on success.
|
||||||
- `--multiquery` or `-n` — Allow multiple queries in the same file.
|
- `--multiquery` or `-n` — Allow multiple queries in the same file.
|
||||||
- `--obfuscate` — Obfuscate instead of formatting.
|
- `--obfuscate` — Obfuscate instead of formatting.
|
||||||
|
@ -24,7 +24,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
|||||||
## Example {#clickhouse-keeper-client-example}
|
## Example {#clickhouse-keeper-client-example}
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
./clickhouse-keeper-client -h localhost -p 9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||||
Connected to ZooKeeper at [::1]:9181 with session_id 137
|
Connected to ZooKeeper at [::1]:9181 with session_id 137
|
||||||
/ :) ls
|
/ :) ls
|
||||||
keeper foo bar
|
keeper foo bar
|
||||||
|
@ -1605,6 +1605,78 @@ Result:
|
|||||||
|
|
||||||
Alias: levenshteinDistance
|
Alias: levenshteinDistance
|
||||||
|
|
||||||
|
## damerauLevenshteinDistance
|
||||||
|
|
||||||
|
Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
damerauLevenshteinDistance(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT damerauLevenshteinDistance('clickhouse', 'mouse');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─damerauLevenshteinDistance('clickhouse', 'mouse')─┐
|
||||||
|
│ 6 │
|
||||||
|
└───────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## jaroSimilarity
|
||||||
|
|
||||||
|
Calculates the [Jaro similarity](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro_similarity) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
jaroSimilarity(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT jaroSimilarity('clickhouse', 'click');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─jaroSimilarity('clickhouse', 'click')─┐
|
||||||
|
│ 0.8333333333333333 │
|
||||||
|
└───────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## jaroWinklerSimilarity
|
||||||
|
|
||||||
|
Calculates the [Jaro-Winkler similarity](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro%E2%80%93Winkler_similarity) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
jaroWinklerSimilarity(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT jaroWinklerSimilarity('clickhouse', 'click');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─jaroWinklerSimilarity('clickhouse', 'click')─┐
|
||||||
|
│ 0.8999999999999999 │
|
||||||
|
└──────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
## initcap
|
## initcap
|
||||||
|
|
||||||
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
||||||
|
@ -372,15 +372,23 @@ ClickHouse supports general purpose codecs and specialized codecs.
|
|||||||
|
|
||||||
#### ZSTD
|
#### ZSTD
|
||||||
|
|
||||||
`ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default value: 1.
|
`ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default level: 1.
|
||||||
|
|
||||||
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
|
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
|
||||||
|
|
||||||
|
#### ZSTD_QAT
|
||||||
|
|
||||||
|
`ZSTD_QAT[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable level, implemented by [Intel® QATlib](https://github.com/intel/qatlib) and [Intel® QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Possible levels: \[1, 12\]. Default level: 1. Recommended level range: \[6, 12\]. Some limitations apply:
|
||||||
|
|
||||||
|
- ZSTD_QAT is disabled by default and can only be used after enabling configuration setting [enable_zstd_qat_codec](../../../operations/settings/settings.md#enable_zstd_qat_codec).
|
||||||
|
- For compression, ZSTD_QAT tries to use an Intel® QAT offloading device ([QuickAssist Technology](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)). If no such device was found, it will fallback to ZSTD compression in software.
|
||||||
|
- Decompression is always performed in software.
|
||||||
|
|
||||||
#### DEFLATE_QPL
|
#### DEFLATE_QPL
|
||||||
|
|
||||||
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
|
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
|
||||||
|
|
||||||
- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_deflate_qpl_codec = 1`.
|
- DEFLATE_QPL is disabled by default and can only be used after enabling configuration setting [enable_deflate_qpl_codec](../../../operations/settings/settings.md#enable_deflate_qpl_codec).
|
||||||
- DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details.
|
- DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details.
|
||||||
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details.
|
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details.
|
||||||
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled.
|
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled.
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
|
#include <IO/copyData.h>
|
||||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||||
@ -14,6 +15,7 @@
|
|||||||
#include <Parsers/obfuscateQueries.h>
|
#include <Parsers/obfuscateQueries.h>
|
||||||
#include <Parsers/parseQuery.h>
|
#include <Parsers/parseQuery.h>
|
||||||
#include <Common/ErrorCodes.h>
|
#include <Common/ErrorCodes.h>
|
||||||
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/TerminalSize.h>
|
#include <Common/TerminalSize.h>
|
||||||
|
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
@ -30,22 +32,49 @@
|
|||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
#include <Formats/registerFormats.h>
|
#include <Formats/registerFormats.h>
|
||||||
|
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB::ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/// skip spaces to avoid throw exception after last query
|
||||||
|
while (pos != end && std::isspace(*pos))
|
||||||
|
++pos;
|
||||||
|
|
||||||
|
const char * comment_begin = pos;
|
||||||
|
/// for skip comment after the last query and to not throw exception
|
||||||
|
if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
|
||||||
|
{
|
||||||
|
pos += 2;
|
||||||
|
/// skip until the end of the line
|
||||||
|
while (pos != end && *pos != '\n')
|
||||||
|
++pos;
|
||||||
|
if (print_comments)
|
||||||
|
std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n";
|
||||||
|
}
|
||||||
|
/// need to parse next sql
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
} while (pos != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||||
|
|
||||||
extern const char * auto_time_zones[];
|
extern const char * auto_time_zones[];
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int INVALID_FORMAT_INSERT_QUERY_WITH_DATA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int mainEntryClickHouseFormat(int argc, char ** argv)
|
int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
using namespace DB;
|
using namespace DB;
|
||||||
@ -56,8 +85,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
desc.add_options()
|
desc.add_options()
|
||||||
("query", po::value<std::string>(), "query to format")
|
("query", po::value<std::string>(), "query to format")
|
||||||
("help,h", "produce help message")
|
("help,h", "produce help message")
|
||||||
|
("comments", "keep comments in the output")
|
||||||
("hilite", "add syntax highlight with ANSI terminal escape sequences")
|
("hilite", "add syntax highlight with ANSI terminal escape sequences")
|
||||||
("oneline", "format in single line")
|
("oneline", "format in single line")
|
||||||
|
("max_line_length", po::value<size_t>()->default_value(0), "format in single line queries with length less than specified")
|
||||||
("quiet,q", "just check syntax, no output on success")
|
("quiet,q", "just check syntax, no output on success")
|
||||||
("multiquery,n", "allow multiple queries in the same file")
|
("multiquery,n", "allow multiple queries in the same file")
|
||||||
("obfuscate", "obfuscate instead of formatting")
|
("obfuscate", "obfuscate instead of formatting")
|
||||||
@ -89,6 +120,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
bool oneline = options.count("oneline");
|
bool oneline = options.count("oneline");
|
||||||
bool quiet = options.count("quiet");
|
bool quiet = options.count("quiet");
|
||||||
bool multiple = options.count("multiquery");
|
bool multiple = options.count("multiquery");
|
||||||
|
bool print_comments = options.count("comments");
|
||||||
|
size_t max_line_length = options["max_line_length"].as<size_t>();
|
||||||
bool obfuscate = options.count("obfuscate");
|
bool obfuscate = options.count("obfuscate");
|
||||||
bool backslash = options.count("backslash");
|
bool backslash = options.count("backslash");
|
||||||
bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
|
bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
|
||||||
@ -105,6 +138,19 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (oneline && max_line_length)
|
||||||
|
{
|
||||||
|
std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_line_length > 255)
|
||||||
|
{
|
||||||
|
std::cerr << "Option 'max_line_length' must be less than 256." << std::endl;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
String query;
|
String query;
|
||||||
|
|
||||||
if (options.count("query"))
|
if (options.count("query"))
|
||||||
@ -125,7 +171,6 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
|
|
||||||
if (options.count("seed"))
|
if (options.count("seed"))
|
||||||
{
|
{
|
||||||
std::string seed;
|
|
||||||
hash_func.update(options["seed"].as<std::string>());
|
hash_func.update(options["seed"].as<std::string>());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,30 +226,75 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
const char * pos = query.data();
|
const char * pos = query.data();
|
||||||
const char * end = pos + query.size();
|
const char * end = pos + query.size();
|
||||||
|
skipSpacesAndComments(pos, end, print_comments);
|
||||||
|
|
||||||
ParserQuery parser(end, allow_settings_after_format_in_insert);
|
ParserQuery parser(end, allow_settings_after_format_in_insert);
|
||||||
do
|
while (pos != end)
|
||||||
{
|
{
|
||||||
|
size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;
|
||||||
|
|
||||||
ASTPtr res = parseQueryAndMovePosition(
|
ASTPtr res = parseQueryAndMovePosition(
|
||||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
|
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
|
||||||
|
|
||||||
/// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure,
|
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
|
||||||
/// we should throw an exception early, and make exception message more readable.
|
/// If the query is INSERT ... VALUES, then we will try to parse the data.
|
||||||
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
|
if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
|
||||||
{
|
{
|
||||||
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
|
if ("Values" != insert_query->format)
|
||||||
"Can't format ASTInsertQuery with data, since data will be lost");
|
throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Can't format INSERT query with data format '{}'", insert_query->format);
|
||||||
|
|
||||||
|
/// Reset format to default to have `INSERT INTO table VALUES` instead of `INSERT INTO table VALUES FORMAT Values`
|
||||||
|
insert_query->format = {};
|
||||||
|
|
||||||
|
/// We assume that data ends with a newline character (same as client does)
|
||||||
|
const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end);
|
||||||
|
insert_query->end = this_query_end;
|
||||||
|
pos = this_query_end;
|
||||||
|
insert_query_payload = getReadBufferFromASTInsertQuery(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!quiet)
|
if (!quiet)
|
||||||
{
|
{
|
||||||
if (!backslash)
|
if (!backslash)
|
||||||
{
|
{
|
||||||
WriteBufferFromOStream res_buf(std::cout, 4096);
|
WriteBufferFromOwnString str_buf;
|
||||||
formatAST(*res, res_buf, hilite, oneline);
|
formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);
|
||||||
res_buf.finalize();
|
|
||||||
if (multiple)
|
if (insert_query_payload)
|
||||||
std::cout << "\n;\n";
|
{
|
||||||
|
str_buf.write(' ');
|
||||||
|
copyData(*insert_query_payload, str_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
String res_string = str_buf.str();
|
||||||
|
const char * s_pos = res_string.data();
|
||||||
|
const char * s_end = s_pos + res_string.size();
|
||||||
|
/// remove trailing spaces
|
||||||
|
while (s_end > s_pos && isWhitespaceASCIIOneLine(*(s_end - 1)))
|
||||||
|
--s_end;
|
||||||
|
WriteBufferFromOStream res_cout(std::cout, 4096);
|
||||||
|
/// For multiline queries we print ';' at new line,
|
||||||
|
/// but for single line queries we print ';' at the same line
|
||||||
|
bool has_multiple_lines = false;
|
||||||
|
while (s_pos != s_end)
|
||||||
|
{
|
||||||
|
if (*s_pos == '\n')
|
||||||
|
has_multiple_lines = true;
|
||||||
|
res_cout.write(*s_pos++);
|
||||||
|
}
|
||||||
|
res_cout.finalize();
|
||||||
|
|
||||||
|
if (multiple && !insert_query_payload)
|
||||||
|
{
|
||||||
|
if (oneline || !has_multiple_lines)
|
||||||
|
std::cout << ";\n";
|
||||||
|
else
|
||||||
|
std::cout << "\n;\n";
|
||||||
|
}
|
||||||
|
else if (multiple && insert_query_payload)
|
||||||
|
/// Do not need to add ; because it's already in the insert_query_payload
|
||||||
|
std::cout << "\n";
|
||||||
|
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
/// add additional '\' at the end of each line;
|
/// add additional '\' at the end of each line;
|
||||||
@ -232,27 +322,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
|||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
skipSpacesAndComments(pos, end, print_comments);
|
||||||
do
|
if (!multiple)
|
||||||
{
|
break;
|
||||||
/// skip spaces to avoid throw exception after last query
|
}
|
||||||
while (pos != end && std::isspace(*pos))
|
|
||||||
++pos;
|
|
||||||
|
|
||||||
/// for skip comment after the last query and to not throw exception
|
|
||||||
if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
|
|
||||||
{
|
|
||||||
pos += 2;
|
|
||||||
/// skip until the end of the line
|
|
||||||
while (pos != end && *pos != '\n')
|
|
||||||
++pos;
|
|
||||||
}
|
|
||||||
/// need to parse next sql
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
} while (pos != end);
|
|
||||||
|
|
||||||
} while (multiple && pos != end);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
|
@ -551,13 +551,18 @@ endif ()
|
|||||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
|
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
|
||||||
|
|
||||||
if (TARGET ch_contrib::qpl)
|
if (TARGET ch_contrib::qpl)
|
||||||
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
|
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (TARGET ch_contrib::accel-config)
|
if (TARGET ch_contrib::accel-config)
|
||||||
dbms_target_link_libraries(PUBLIC ch_contrib::accel-config)
|
dbms_target_link_libraries(PUBLIC ch_contrib::accel-config)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (TARGET ch_contrib::qatzstd_plugin)
|
||||||
|
dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin)
|
||||||
|
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin)
|
||||||
|
endif ()
|
||||||
|
|
||||||
target_link_libraries(clickhouse_common_io PUBLIC boost::context)
|
target_link_libraries(clickhouse_common_io PUBLIC boost::context)
|
||||||
dbms_target_link_libraries(PUBLIC boost::context)
|
dbms_target_link_libraries(PUBLIC boost::context)
|
||||||
|
|
||||||
|
@ -651,7 +651,13 @@ void Connection::sendQuery(
|
|||||||
if (method == "ZSTD")
|
if (method == "ZSTD")
|
||||||
level = settings->network_zstd_compression_level;
|
level = settings->network_zstd_compression_level;
|
||||||
|
|
||||||
CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_deflate_qpl_codec);
|
CompressionCodecFactory::instance().validateCodec(
|
||||||
|
method,
|
||||||
|
level,
|
||||||
|
!settings->allow_suspicious_codecs,
|
||||||
|
settings->allow_experimental_codecs,
|
||||||
|
settings->enable_deflate_qpl_codec,
|
||||||
|
settings->enable_zstd_qat_codec);
|
||||||
compression_codec = CompressionCodecFactory::instance().get(method, level);
|
compression_codec = CompressionCodecFactory::instance().get(method, level);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -43,6 +43,19 @@ void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, Atomic
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)
|
||||||
|
{
|
||||||
|
cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
|
||||||
|
"Load job '{}' -> {}",
|
||||||
|
self->name,
|
||||||
|
getExceptionMessage(dependency->exception(), /* with_stacktrace = */ false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ignoreDependencyFailure(const LoadJobPtr &, const LoadJobPtr &, std::exception_ptr &)
|
||||||
|
{
|
||||||
|
// No-op
|
||||||
|
}
|
||||||
|
|
||||||
LoadStatus LoadJob::status() const
|
LoadStatus LoadJob::status() const
|
||||||
{
|
{
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
@ -96,7 +109,10 @@ size_t LoadJob::canceled(const std::exception_ptr & ptr)
|
|||||||
|
|
||||||
size_t LoadJob::finish()
|
size_t LoadJob::finish()
|
||||||
{
|
{
|
||||||
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` return
|
// To ensure functions are destructed before `AsyncLoader::wait()` return
|
||||||
|
func = {};
|
||||||
|
dependency_failure = {};
|
||||||
|
|
||||||
finish_time = std::chrono::system_clock::now();
|
finish_time = std::chrono::system_clock::now();
|
||||||
if (waiters > 0)
|
if (waiters > 0)
|
||||||
finished.notify_all();
|
finished.notify_all();
|
||||||
@ -327,17 +343,19 @@ void AsyncLoader::schedule(const LoadJobSet & jobs_to_schedule)
|
|||||||
|
|
||||||
if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED)
|
if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED)
|
||||||
{
|
{
|
||||||
// Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs.
|
// Dependency on already failed or canceled job -- it's okay.
|
||||||
std::exception_ptr e;
|
// Process as usual (may lead to cancel of all dependent jobs).
|
||||||
|
std::exception_ptr cancel;
|
||||||
NOEXCEPT_SCOPE({
|
NOEXCEPT_SCOPE({
|
||||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||||
e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
|
if (job->dependency_failure)
|
||||||
"Load job '{}' -> {}",
|
job->dependency_failure(job, dep, cancel);
|
||||||
job->name,
|
|
||||||
getExceptionMessage(dep->exception(), /* with_stacktrace = */ false)));
|
|
||||||
});
|
});
|
||||||
finish(job, LoadStatus::CANCELED, e, lock);
|
if (cancel)
|
||||||
break; // This job is now finished, stop its dependencies processing
|
{
|
||||||
|
finish(job, LoadStatus::CANCELED, cancel, lock);
|
||||||
|
break; // This job is now finished, stop its dependencies processing
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -515,63 +533,76 @@ String AsyncLoader::checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJo
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock)
|
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock<std::mutex> & lock)
|
||||||
{
|
{
|
||||||
chassert(scheduled_jobs.contains(job)); // Job was pending
|
chassert(scheduled_jobs.contains(job)); // Job was pending
|
||||||
|
|
||||||
|
// Notify waiters
|
||||||
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
|
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
|
||||||
if (status == LoadStatus::OK)
|
if (status == LoadStatus::OK)
|
||||||
{
|
resumed_workers = job->ok();
|
||||||
// Notify waiters
|
else if (status == LoadStatus::FAILED)
|
||||||
resumed_workers += job->ok();
|
resumed_workers = job->failed(reason);
|
||||||
|
else if (status == LoadStatus::CANCELED)
|
||||||
|
resumed_workers = job->canceled(reason);
|
||||||
|
|
||||||
// Update dependent jobs and enqueue if ready
|
// Adjust suspended workers count
|
||||||
for (const auto & dep : scheduled_jobs[job].dependent_jobs)
|
if (resumed_workers)
|
||||||
|
{
|
||||||
|
Pool & pool = pools[job->executionPool()];
|
||||||
|
pool.suspended_workers -= resumed_workers;
|
||||||
|
}
|
||||||
|
|
||||||
|
Info & info = scheduled_jobs[job];
|
||||||
|
if (info.isReady())
|
||||||
|
{
|
||||||
|
// Job could be in ready queue (on cancel) -- must be dequeued
|
||||||
|
pools[job->pool_id].ready_queue.erase(info.ready_seqno);
|
||||||
|
info.ready_seqno = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// To avoid container modification during recursion (during clean dependency graph edges below)
|
||||||
|
LoadJobSet dependent;
|
||||||
|
dependent.swap(info.dependent_jobs);
|
||||||
|
|
||||||
|
// Update dependent jobs
|
||||||
|
for (const auto & dpt : dependent)
|
||||||
|
{
|
||||||
|
if (auto dpt_info = scheduled_jobs.find(dpt); dpt_info != scheduled_jobs.end())
|
||||||
{
|
{
|
||||||
chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending
|
dpt_info->second.dependencies_left--;
|
||||||
Info & dep_info = scheduled_jobs[dep];
|
if (!dpt_info->second.isBlocked())
|
||||||
dep_info.dependencies_left--;
|
enqueue(dpt_info->second, dpt, lock);
|
||||||
if (!dep_info.isBlocked())
|
|
||||||
enqueue(dep_info, dep, lock);
|
if (status != LoadStatus::OK)
|
||||||
|
{
|
||||||
|
std::exception_ptr cancel;
|
||||||
|
NOEXCEPT_SCOPE({
|
||||||
|
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||||
|
if (dpt->dependency_failure)
|
||||||
|
dpt->dependency_failure(dpt, job, cancel);
|
||||||
|
});
|
||||||
|
// Recurse into dependent job if it should be canceled
|
||||||
|
if (cancel)
|
||||||
|
finish(dpt, LoadStatus::CANCELED, cancel, lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Job has already been canceled. Do not enter twice into the same job during finish recursion.
|
||||||
|
// This happens in {A<-B; A<-C; B<-D; C<-D} graph for D if A is failed or canceled.
|
||||||
|
chassert(status == LoadStatus::CANCELED);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
// Clean dependency graph edges pointing to canceled jobs
|
||||||
|
if (status != LoadStatus::OK)
|
||||||
{
|
{
|
||||||
// Notify waiters
|
|
||||||
if (status == LoadStatus::FAILED)
|
|
||||||
resumed_workers += job->failed(exception_from_job);
|
|
||||||
else if (status == LoadStatus::CANCELED)
|
|
||||||
resumed_workers += job->canceled(exception_from_job);
|
|
||||||
|
|
||||||
Info & info = scheduled_jobs[job];
|
|
||||||
if (info.isReady())
|
|
||||||
{
|
|
||||||
pools[job->pool_id].ready_queue.erase(info.ready_seqno);
|
|
||||||
info.ready_seqno = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recurse into all dependent jobs
|
|
||||||
LoadJobSet dependent;
|
|
||||||
dependent.swap(info.dependent_jobs); // To avoid container modification during recursion
|
|
||||||
for (const auto & dep : dependent)
|
|
||||||
{
|
|
||||||
if (!scheduled_jobs.contains(dep))
|
|
||||||
continue; // Job has already been canceled
|
|
||||||
std::exception_ptr e;
|
|
||||||
NOEXCEPT_SCOPE({
|
|
||||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
|
||||||
e = std::make_exception_ptr(
|
|
||||||
Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
|
|
||||||
"Load job '{}' -> {}",
|
|
||||||
dep->name,
|
|
||||||
getExceptionMessage(exception_from_job, /* with_stacktrace = */ false)));
|
|
||||||
});
|
|
||||||
finish(dep, LoadStatus::CANCELED, e, lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean dependency graph edges pointing to canceled jobs
|
|
||||||
for (const auto & dep : job->dependencies)
|
for (const auto & dep : job->dependencies)
|
||||||
|
{
|
||||||
if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end())
|
if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end())
|
||||||
dep_info->second.dependent_jobs.erase(job);
|
dep_info->second.dependent_jobs.erase(job);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Job became finished
|
// Job became finished
|
||||||
@ -582,12 +613,6 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
|
|||||||
if (log_progress)
|
if (log_progress)
|
||||||
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
|
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (resumed_workers)
|
|
||||||
{
|
|
||||||
Pool & pool = pools[job->executionPool()];
|
|
||||||
pool.suspended_workers -= resumed_workers;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
|
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
|
||||||
@ -612,6 +637,9 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::un
|
|||||||
}
|
}
|
||||||
|
|
||||||
job->pool_id.store(new_pool_id);
|
job->pool_id.store(new_pool_id);
|
||||||
|
// TODO(serxa): we should adjust suspended_workers and suspended_waiters here.
|
||||||
|
// Otherwise suspended_workers we be left inconsistent. Fix it and add a test.
|
||||||
|
// Scenario: schedule a job A, wait for it from a job B in the same pool, prioritize A
|
||||||
|
|
||||||
// Recurse into dependencies
|
// Recurse into dependencies
|
||||||
for (const auto & dep : job->dependencies)
|
for (const auto & dep : job->dependencies)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
#include <concepts>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -57,12 +58,13 @@ enum class LoadStatus
|
|||||||
class LoadJob : private boost::noncopyable
|
class LoadJob : private boost::noncopyable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
template <class Func, class LoadJobSetType>
|
template <class LoadJobSetType, class Func, class DFFunc>
|
||||||
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_)
|
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, DFFunc && dependency_failure_, Func && func_)
|
||||||
: dependencies(std::forward<LoadJobSetType>(dependencies_))
|
: dependencies(std::forward<LoadJobSetType>(dependencies_))
|
||||||
, name(std::move(name_))
|
, name(std::move(name_))
|
||||||
, execution_pool_id(pool_id_)
|
, execution_pool_id(pool_id_)
|
||||||
, pool_id(pool_id_)
|
, pool_id(pool_id_)
|
||||||
|
, dependency_failure(std::forward<DFFunc>(dependency_failure_))
|
||||||
, func(std::forward<Func>(func_))
|
, func(std::forward<Func>(func_))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -108,6 +110,14 @@ private:
|
|||||||
std::atomic<UInt64> job_id{0};
|
std::atomic<UInt64> job_id{0};
|
||||||
std::atomic<size_t> execution_pool_id;
|
std::atomic<size_t> execution_pool_id;
|
||||||
std::atomic<size_t> pool_id;
|
std::atomic<size_t> pool_id;
|
||||||
|
|
||||||
|
// Handler for failed or canceled dependencies.
|
||||||
|
// If job needs to be canceled on `dependency` failure, then function should set `cancel` to a specific reason.
|
||||||
|
// Note that implementation should be fast and cannot use AsyncLoader, because it is called under `AsyncLoader::mutex`.
|
||||||
|
// Note that `dependency_failure` is called only on pending jobs.
|
||||||
|
std::function<void(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)> dependency_failure;
|
||||||
|
|
||||||
|
// Function to be called to execute the job.
|
||||||
std::function<void(AsyncLoader & loader, const LoadJobPtr & self)> func;
|
std::function<void(AsyncLoader & loader, const LoadJobPtr & self)> func;
|
||||||
|
|
||||||
mutable std::mutex mutex;
|
mutable std::mutex mutex;
|
||||||
@ -123,35 +133,54 @@ private:
|
|||||||
std::atomic<TimePoint> finish_time{TimePoint{}};
|
std::atomic<TimePoint> finish_time{TimePoint{}};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct EmptyJobFunc
|
// For LoadJob::dependency_failure. Cancels the job on the first dependency failure or cancel.
|
||||||
{
|
void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel);
|
||||||
void operator()(AsyncLoader &, const LoadJobPtr &) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class Func = EmptyJobFunc>
|
// For LoadJob::dependency_failure. Never cancels the job due to dependency failure or cancel.
|
||||||
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func = EmptyJobFunc())
|
void ignoreDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel);
|
||||||
|
|
||||||
|
template <class F> concept LoadJobDependencyFailure = std::invocable<F, const LoadJobPtr &, const LoadJobPtr &, std::exception_ptr &>;
|
||||||
|
template <class F> concept LoadJobFunc = std::invocable<F, AsyncLoader &, const LoadJobPtr &>;
|
||||||
|
|
||||||
|
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
|
||||||
{
|
{
|
||||||
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, std::forward<Func>(func));
|
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Func = EmptyJobFunc>
|
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
|
||||||
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, Func && func = EmptyJobFunc())
|
|
||||||
{
|
{
|
||||||
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, std::forward<Func>(func));
|
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Func = EmptyJobFunc>
|
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
|
||||||
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
|
|
||||||
{
|
{
|
||||||
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, std::forward<Func>(func));
|
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Func = EmptyJobFunc>
|
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func)
|
||||||
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
|
|
||||||
{
|
{
|
||||||
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func));
|
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<decltype(dependency_failure)>(dependency_failure), std::forward<decltype(func)>(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobFunc auto && func)
|
||||||
|
{
|
||||||
|
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobFunc auto && func)
|
||||||
|
{
|
||||||
|
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobFunc auto && func)
|
||||||
|
{
|
||||||
|
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobFunc auto && func)
|
||||||
|
{
|
||||||
|
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, cancelOnDependencyFailure, std::forward<decltype(func)>(func));
|
||||||
|
}
|
||||||
|
|
||||||
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
|
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
|
||||||
class LoadTask : private boost::noncopyable
|
class LoadTask : private boost::noncopyable
|
||||||
@ -277,7 +306,7 @@ private:
|
|||||||
{
|
{
|
||||||
size_t dependencies_left = 0; // Current number of dependencies on pending jobs.
|
size_t dependencies_left = 0; // Current number of dependencies on pending jobs.
|
||||||
UInt64 ready_seqno = 0; // Zero means that job is not in ready queue.
|
UInt64 ready_seqno = 0; // Zero means that job is not in ready queue.
|
||||||
LoadJobSet dependent_jobs; // Set of jobs dependent on this job.
|
LoadJobSet dependent_jobs; // Set of jobs dependent on this job. Contains only scheduled jobs.
|
||||||
|
|
||||||
// Three independent states of a scheduled job.
|
// Three independent states of a scheduled job.
|
||||||
bool isBlocked() const { return dependencies_left > 0; }
|
bool isBlocked() const { return dependencies_left > 0; }
|
||||||
@ -371,7 +400,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
|
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
|
||||||
String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
|
String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
|
||||||
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock);
|
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr reason, std::unique_lock<std::mutex> & lock);
|
||||||
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
|
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
|
||||||
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
|
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
|
||||||
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
|
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
|
||||||
|
@ -242,7 +242,7 @@
|
|||||||
M(FilesystemCacheDelayedCleanupElements, "Filesystem cache elements in background cleanup queue") \
|
M(FilesystemCacheDelayedCleanupElements, "Filesystem cache elements in background cleanup queue") \
|
||||||
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segment which are currently hold as unreleasable") \
|
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segment which are currently hold as unreleasable") \
|
||||||
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
|
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
|
||||||
M(S3Requests, "S3 requests") \
|
M(S3Requests, "S3 requests count") \
|
||||||
M(KeeperAliveConnections, "Number of alive connections") \
|
M(KeeperAliveConnections, "Number of alive connections") \
|
||||||
M(KeeperOutstandingRequets, "Number of outstanding requests") \
|
M(KeeperOutstandingRequets, "Number of outstanding requests") \
|
||||||
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \
|
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \
|
||||||
|
494
src/Common/MatchGenerator.cpp
Normal file
494
src/Common/MatchGenerator.cpp
Normal file
@ -0,0 +1,494 @@
|
|||||||
|
#ifdef __clang__
|
||||||
|
# pragma clang diagnostic push
|
||||||
|
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||||
|
# pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
||||||
|
# pragma clang diagnostic ignored "-Wnested-anon-types"
|
||||||
|
# pragma clang diagnostic ignored "-Wunused-parameter"
|
||||||
|
# pragma clang diagnostic ignored "-Wshadow-field-in-constructor"
|
||||||
|
# pragma clang diagnostic ignored "-Wdtor-name"
|
||||||
|
#endif
|
||||||
|
#include <re2/re2.h>
|
||||||
|
#include <re2/regexp.h>
|
||||||
|
#include <re2/walker-inl.h>
|
||||||
|
#ifdef __clang__
|
||||||
|
# pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LOG_INFO
|
||||||
|
#undef LOG_INFO
|
||||||
|
#undef LOG_WARNING
|
||||||
|
#undef LOG_ERROR
|
||||||
|
#undef LOG_FATAL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "MatchGenerator.h"
|
||||||
|
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/thread_local_rng.h>
|
||||||
|
#include <map>
|
||||||
|
#include <functional>
|
||||||
|
#include <magic_enum.hpp>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace re2
|
||||||
|
{
|
||||||
|
|
||||||
|
class RandomStringPrepareWalker : public Regexp::Walker<Regexp *>
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
static constexpr int ImplicitMax = 100;
|
||||||
|
|
||||||
|
using Children = std::vector<Regexp *>;
|
||||||
|
|
||||||
|
class Generators;
|
||||||
|
|
||||||
|
/// This function objects look much prettier than lambda expression when stack traces are printed
|
||||||
|
class NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual size_t operator() (char * out, size_t size) = 0;
|
||||||
|
virtual size_t getRequiredSize() = 0;
|
||||||
|
virtual ~NodeFunction() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
using NodeFunctionPtr = std::shared_ptr<NodeFunction>;
|
||||||
|
using NodeFuncs = std::vector<NodeFunctionPtr>;
|
||||||
|
|
||||||
|
static NodeFuncs getFuncs(const Children & children_, const Generators & generators_)
|
||||||
|
{
|
||||||
|
NodeFuncs result;
|
||||||
|
result.reserve(children_.size());
|
||||||
|
|
||||||
|
for (auto * child: children_)
|
||||||
|
{
|
||||||
|
result.push_back(generators_.at(child));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Generators: public std::map<re2::Regexp *, NodeFunctionPtr> {};
|
||||||
|
|
||||||
|
class RegexpConcatFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RegexpConcatFunction(const Children & children_, const Generators & generators_)
|
||||||
|
: children(getFuncs(children_, generators_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
size_t total_size = 0;
|
||||||
|
|
||||||
|
for (auto & child: children)
|
||||||
|
{
|
||||||
|
size_t consumed = child->operator()(out, size);
|
||||||
|
chassert(consumed <= size);
|
||||||
|
out += consumed;
|
||||||
|
size -= consumed;
|
||||||
|
total_size += consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
size_t total_size = 0;
|
||||||
|
for (auto & child: children)
|
||||||
|
total_size += child->getRequiredSize();
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
NodeFuncs children;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegexpAlternateFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RegexpAlternateFunction(const Children & children_, const Generators & generators_)
|
||||||
|
: children(getFuncs(children_, generators_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
std::uniform_int_distribution<int> distribution(0, static_cast<int>(children.size()-1));
|
||||||
|
int chosen = distribution(thread_local_rng);
|
||||||
|
size_t consumed = children[chosen]->operator()(out, size);
|
||||||
|
chassert(consumed <= size);
|
||||||
|
return consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
size_t total_size = 0;
|
||||||
|
for (auto & child: children)
|
||||||
|
total_size = std::max(total_size, child->getRequiredSize());
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
NodeFuncs children;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegexpRepeatFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RegexpRepeatFunction(Regexp * re_, const Generators & generators_, int min_repeat_, int max_repeat_)
|
||||||
|
: func(generators_.at(re_))
|
||||||
|
, min_repeat(min_repeat_)
|
||||||
|
, max_repeat(max_repeat_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
std::uniform_int_distribution<int> distribution(min_repeat, max_repeat);
|
||||||
|
int ntimes = distribution(thread_local_rng);
|
||||||
|
|
||||||
|
size_t total_size = 0;
|
||||||
|
for (int i = 0; i < ntimes; ++i)
|
||||||
|
{
|
||||||
|
size_t consumed =func->operator()(out, size);
|
||||||
|
chassert(consumed <= size);
|
||||||
|
out += consumed;
|
||||||
|
size -= consumed;
|
||||||
|
total_size += consumed;
|
||||||
|
}
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
return max_repeat * func->getRequiredSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
NodeFunctionPtr func;
|
||||||
|
int min_repeat = 0;
|
||||||
|
int max_repeat = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegexpCharClassFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
using CharRanges = std::vector<std::pair<re2::Rune, re2::Rune>>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit RegexpCharClassFunction(Regexp * re_)
|
||||||
|
{
|
||||||
|
CharClass * cc = re_->cc();
|
||||||
|
chassert(cc);
|
||||||
|
if (cc->empty())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "kRegexpCharClass is empty");
|
||||||
|
|
||||||
|
char_count = cc->size();
|
||||||
|
char_ranges.reserve(std::distance(cc->begin(), cc->end()));
|
||||||
|
|
||||||
|
for (const auto range: *cc)
|
||||||
|
{
|
||||||
|
char_ranges.emplace_back(range.lo, range.hi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
chassert(UTFmax <= size);
|
||||||
|
|
||||||
|
std::uniform_int_distribution<int> distribution(1, char_count);
|
||||||
|
int chosen = distribution(thread_local_rng);
|
||||||
|
int count_down = chosen;
|
||||||
|
|
||||||
|
auto it = char_ranges.begin();
|
||||||
|
for (; it != char_ranges.end(); ++it)
|
||||||
|
{
|
||||||
|
auto [lo, hi] = *it;
|
||||||
|
auto range_len = hi - lo + 1;
|
||||||
|
if (count_down <= range_len)
|
||||||
|
break;
|
||||||
|
count_down -= range_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it == char_ranges.end())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Unable to choose the rune. Runes {}, ranges {}, chosen {}",
|
||||||
|
char_count, char_ranges.size(), chosen);
|
||||||
|
|
||||||
|
auto [lo, _] = *it;
|
||||||
|
Rune r = lo + count_down - 1;
|
||||||
|
return re2::runetochar(out, &r);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
return UTFmax;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int char_count = 0;
|
||||||
|
CharRanges char_ranges;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegexpLiteralStringFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit RegexpLiteralStringFunction(Regexp * re_)
|
||||||
|
{
|
||||||
|
if (re_->nrunes() == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char buffer[UTFmax];
|
||||||
|
for (int i = 0; i < re_->nrunes(); ++i)
|
||||||
|
{
|
||||||
|
int n = re2::runetochar(buffer, &re_->runes()[i]);
|
||||||
|
literal_string += String(buffer, n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
chassert(literal_string.size() <= size);
|
||||||
|
|
||||||
|
memcpy(out, literal_string.data(), literal_string.size());
|
||||||
|
return literal_string.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
return literal_string.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String literal_string;
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegexpLiteralFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit RegexpLiteralFunction(Regexp * re_)
|
||||||
|
{
|
||||||
|
char buffer[UTFmax];
|
||||||
|
|
||||||
|
Rune r = re_->rune();
|
||||||
|
int n = re2::runetochar(buffer, &r);
|
||||||
|
literal = String(buffer, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char * out, size_t size) override
|
||||||
|
{
|
||||||
|
chassert(literal.size() <= size);
|
||||||
|
|
||||||
|
memcpy(out, literal.data(), literal.size());
|
||||||
|
return literal.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
return literal.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String literal;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ThrowExceptionFunction : public NodeFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ThrowExceptionFunction(Regexp * re_)
|
||||||
|
: operation(magic_enum::enum_name(re_->op()))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t operator () (char *, size_t) override
|
||||||
|
{
|
||||||
|
throw DB::Exception(
|
||||||
|
DB::ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"RandomStringPrepareWalker: regexp node '{}' is not supported for generating a random match",
|
||||||
|
operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getRequiredSize() override
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String operation;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
std::function<String()> getGenerator()
|
||||||
|
{
|
||||||
|
if (root == nullptr)
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no root has been set");
|
||||||
|
|
||||||
|
if (generators.empty())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "no generators");
|
||||||
|
|
||||||
|
auto root_func = generators.at(root);
|
||||||
|
auto required_buffer_size = root_func->getRequiredSize();
|
||||||
|
auto generator_func = [=] ()
|
||||||
|
-> String
|
||||||
|
{
|
||||||
|
auto buffer = String(required_buffer_size, '\0');
|
||||||
|
size_t size = root_func->operator()(buffer.data(), buffer.size());
|
||||||
|
buffer.resize(size);
|
||||||
|
return buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
root = nullptr;
|
||||||
|
generators = {};
|
||||||
|
|
||||||
|
return std::move(generator_func);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Children CopyChildrenArgs(Regexp ** children, int nchild)
|
||||||
|
{
|
||||||
|
Children result;
|
||||||
|
result.reserve(nchild);
|
||||||
|
for (int i = 0; i < nchild; ++i)
|
||||||
|
result.push_back(Copy(children[i]));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Regexp * ShortVisit(Regexp* /*re*/, Regexp * /*parent_arg*/) override
|
||||||
|
{
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ShortVisit should not be called");
|
||||||
|
}
|
||||||
|
|
||||||
|
Regexp * PreVisit(Regexp * re, Regexp * parent_arg, bool* /*stop*/) override /*noexcept*/
|
||||||
|
{
|
||||||
|
if (parent_arg == nullptr)
|
||||||
|
{
|
||||||
|
chassert(root == nullptr);
|
||||||
|
chassert(re != nullptr);
|
||||||
|
root = re;
|
||||||
|
}
|
||||||
|
|
||||||
|
return re;
|
||||||
|
}
|
||||||
|
|
||||||
|
Regexp * PostVisit(Regexp * re, Regexp * /*parent_arg*/, Regexp * pre_arg,
|
||||||
|
Regexp ** child_args, int nchild_args) override /*noexcept*/
|
||||||
|
{
|
||||||
|
switch (re->op())
|
||||||
|
{
|
||||||
|
case kRegexpConcat: // Matches concatenation of sub_[0..nsub-1].
|
||||||
|
generators[re] = std::make_shared<RegexpConcatFunction>(CopyChildrenArgs(child_args, nchild_args), generators);
|
||||||
|
break;
|
||||||
|
case kRegexpAlternate: // Matches union of sub_[0..nsub-1].
|
||||||
|
generators[re] = std::make_shared<RegexpAlternateFunction>(CopyChildrenArgs(child_args, nchild_args), generators);
|
||||||
|
break;
|
||||||
|
case kRegexpQuest: // Matches sub_[0] zero or one times.
|
||||||
|
chassert(nchild_args == 1);
|
||||||
|
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 0, 1);
|
||||||
|
break;
|
||||||
|
case kRegexpStar: // Matches sub_[0] zero or more times.
|
||||||
|
chassert(nchild_args == 1);
|
||||||
|
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 0, ImplicitMax);
|
||||||
|
break;
|
||||||
|
case kRegexpPlus: // Matches sub_[0] one or more times.
|
||||||
|
chassert(nchild_args == 1);
|
||||||
|
generators[re] = std::make_shared<RegexpRepeatFunction>(child_args[0], generators, 1, ImplicitMax);
|
||||||
|
break;
|
||||||
|
case kRegexpCharClass: // Matches character class given by cc_.
|
||||||
|
chassert(nchild_args == 0);
|
||||||
|
generators[re] = std::make_shared<RegexpCharClassFunction>(re);
|
||||||
|
break;
|
||||||
|
case kRegexpLiteralString: // Matches runes_.
|
||||||
|
chassert(nchild_args == 0);
|
||||||
|
generators[re] = std::make_shared<RegexpLiteralStringFunction>(re);
|
||||||
|
break;
|
||||||
|
case kRegexpLiteral: // Matches rune_.
|
||||||
|
chassert(nchild_args == 0);
|
||||||
|
generators[re] = std::make_shared<RegexpLiteralFunction>(re);
|
||||||
|
break;
|
||||||
|
case kRegexpCapture: // Parenthesized (capturing) subexpression.
|
||||||
|
chassert(nchild_args == 1);
|
||||||
|
generators[re] = generators.at(child_args[0]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case kRegexpNoMatch: // Matches no strings.
|
||||||
|
case kRegexpEmptyMatch: // Matches empty string.
|
||||||
|
case kRegexpRepeat: // Matches sub_[0] at least min_ times, at most max_ times.
|
||||||
|
case kRegexpAnyChar: // Matches any character.
|
||||||
|
case kRegexpAnyByte: // Matches any byte [sic].
|
||||||
|
case kRegexpBeginLine: // Matches empty string at beginning of line.
|
||||||
|
case kRegexpEndLine: // Matches empty string at end of line.
|
||||||
|
case kRegexpWordBoundary: // Matches word boundary "\b".
|
||||||
|
case kRegexpNoWordBoundary: // Matches not-a-word boundary "\B".
|
||||||
|
case kRegexpBeginText: // Matches empty string at beginning of text.
|
||||||
|
case kRegexpEndText: // Matches empty string at end of text.
|
||||||
|
case kRegexpHaveMatch: // Forces match of entire expression
|
||||||
|
generators[re] = std::make_shared<ThrowExceptionFunction>(re);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pre_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
Regexp * root = nullptr;
|
||||||
|
Generators generators;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
void RandomStringGeneratorByRegexp::RegexpPtrDeleter::operator() (re2::Regexp * re) const noexcept
|
||||||
|
{
|
||||||
|
re->Decref();
|
||||||
|
}
|
||||||
|
|
||||||
|
RandomStringGeneratorByRegexp::RandomStringGeneratorByRegexp(const String & re_str)
|
||||||
|
{
|
||||||
|
re2::RE2::Options options;
|
||||||
|
options.set_case_sensitive(true);
|
||||||
|
options.set_encoding(re2::RE2::Options::EncodingLatin1);
|
||||||
|
auto flags = static_cast<re2::Regexp::ParseFlags>(options.ParseFlags());
|
||||||
|
|
||||||
|
re2::RegexpStatus status;
|
||||||
|
regexp.reset(re2::Regexp::Parse(re_str, flags, &status));
|
||||||
|
|
||||||
|
if (!regexp)
|
||||||
|
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Error parsing regexp '{}': {}",
|
||||||
|
re_str, status.Text());
|
||||||
|
|
||||||
|
regexp.reset(regexp->Simplify());
|
||||||
|
|
||||||
|
auto walker = re2::RandomStringPrepareWalker();
|
||||||
|
walker.Walk(regexp.get(), {});
|
||||||
|
generatorFunc = walker.getGenerator();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto test_check = generate();
|
||||||
|
auto matched = RE2::FullMatch(test_check, re2::RE2(re_str));
|
||||||
|
if (!matched)
|
||||||
|
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Generator is unable to produce random string for regexp '{}': {}",
|
||||||
|
re_str, test_check);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String RandomStringGeneratorByRegexp::generate() const
|
||||||
|
{
|
||||||
|
chassert(generatorFunc);
|
||||||
|
return generatorFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
31
src/Common/MatchGenerator.h
Normal file
31
src/Common/MatchGenerator.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <base/types.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace re2
|
||||||
|
{
|
||||||
|
class Regexp;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class RandomStringGeneratorByRegexp
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit RandomStringGeneratorByRegexp(const String & re_str);
|
||||||
|
String generate() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct RegexpPtrDeleter
|
||||||
|
{
|
||||||
|
void operator()(re2::Regexp * re) const noexcept;
|
||||||
|
};
|
||||||
|
using RegexpPtr = std::unique_ptr<re2::Regexp, RegexpPtrDeleter>;
|
||||||
|
|
||||||
|
RegexpPtr regexp;
|
||||||
|
std::function<String()> generatorFunc;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -65,4 +65,5 @@ ObjectStorageKey ObjectStorageKey::createAsAbsolute(String key_)
|
|||||||
object_key.is_relative = false;
|
object_key.is_relative = false;
|
||||||
return object_key;
|
return object_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
94
src/Common/ObjectStorageKeyGenerator.cpp
Normal file
94
src/Common/ObjectStorageKeyGenerator.cpp
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
#include "ObjectStorageKeyGenerator.h"
|
||||||
|
|
||||||
|
#include <Common/getRandomASCIIString.h>
|
||||||
|
#include <Common/MatchGenerator.h>
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
|
||||||
|
class GeneratorWithTemplate : public DB::IObjectStorageKeysGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit GeneratorWithTemplate(String key_template_)
|
||||||
|
: key_template(std::move(key_template_))
|
||||||
|
, re_gen(key_template)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
DB::ObjectStorageKey generate(const String &) const override
|
||||||
|
{
|
||||||
|
return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String key_template;
|
||||||
|
DB::RandomStringGeneratorByRegexp re_gen;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class GeneratorWithPrefix : public DB::IObjectStorageKeysGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit GeneratorWithPrefix(String key_prefix_)
|
||||||
|
: key_prefix(std::move(key_prefix_))
|
||||||
|
{}
|
||||||
|
|
||||||
|
DB::ObjectStorageKey generate(const String &) const override
|
||||||
|
{
|
||||||
|
/// Path to store the new S3 object.
|
||||||
|
|
||||||
|
/// Total length is 32 a-z characters for enough randomness.
|
||||||
|
/// First 3 characters are used as a prefix for
|
||||||
|
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
|
||||||
|
constexpr size_t key_name_total_size = 32;
|
||||||
|
constexpr size_t key_name_prefix_size = 3;
|
||||||
|
|
||||||
|
/// Path to store new S3 object.
|
||||||
|
String key = fmt::format("{}/{}",
|
||||||
|
DB::getRandomASCIIString(key_name_prefix_size),
|
||||||
|
DB::getRandomASCIIString(key_name_total_size - key_name_prefix_size));
|
||||||
|
|
||||||
|
/// what ever key_prefix value is, consider that key as relative
|
||||||
|
return DB::ObjectStorageKey::createAsRelative(key_prefix, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String key_prefix;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class GeneratorAsIsWithPrefix : public DB::IObjectStorageKeysGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit GeneratorAsIsWithPrefix(String key_prefix_)
|
||||||
|
: key_prefix(std::move(key_prefix_))
|
||||||
|
{}
|
||||||
|
|
||||||
|
DB::ObjectStorageKey generate(const String & path) const override
|
||||||
|
{
|
||||||
|
return DB::ObjectStorageKey::createAsRelative(key_prefix, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
String key_prefix;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix)
|
||||||
|
{
|
||||||
|
return std::make_shared<GeneratorAsIsWithPrefix>(std::move(key_prefix));
|
||||||
|
}
|
||||||
|
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix)
|
||||||
|
{
|
||||||
|
return std::make_shared<GeneratorWithPrefix>(std::move(key_prefix));
|
||||||
|
}
|
||||||
|
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template)
|
||||||
|
{
|
||||||
|
return std::make_shared<GeneratorWithTemplate>(std::move(key_template));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
22
src/Common/ObjectStorageKeyGenerator.h
Normal file
22
src/Common/ObjectStorageKeyGenerator.h
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ObjectStorageKey.h"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class IObjectStorageKeysGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ObjectStorageKey generate(const String & path) const = 0;
|
||||||
|
virtual ~IObjectStorageKeysGenerator() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
using ObjectStorageKeysGeneratorPtr = std::shared_ptr<IObjectStorageKeysGenerator>;
|
||||||
|
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorAsIsWithPrefix(String key_prefix);
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByPrefix(String key_prefix);
|
||||||
|
ObjectStorageKeysGeneratorPtr createObjectStorageKeysGeneratorByTemplate(String key_template);
|
||||||
|
|
||||||
|
}
|
@ -391,6 +391,9 @@ The server successfully detected this situation and will download merged part fr
|
|||||||
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
|
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
|
||||||
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
|
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
|
||||||
\
|
\
|
||||||
|
M(S3Clients, "Number of created S3 clients.") \
|
||||||
|
M(TinyS3Clients, "Number of S3 clients copies which reuse an existing auth provider from another client.") \
|
||||||
|
\
|
||||||
M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \
|
M(EngineFileLikeReadFiles, "Number of files read in table engines working with files (like File/S3/URL/HDFS).") \
|
||||||
\
|
\
|
||||||
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
|
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
|
||||||
|
@ -296,6 +296,9 @@ constexpr std::pair<std::string_view, std::string_view> replacements[]
|
|||||||
// Replace parts from @c replacements with shorter aliases
|
// Replace parts from @c replacements with shorter aliases
|
||||||
String demangleAndCollapseNames(std::string_view file, const char * const symbol_name)
|
String demangleAndCollapseNames(std::string_view file, const char * const symbol_name)
|
||||||
{
|
{
|
||||||
|
if (!symbol_name)
|
||||||
|
return "?";
|
||||||
|
|
||||||
std::string_view file_copy = file;
|
std::string_view file_copy = file;
|
||||||
if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos)
|
if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos)
|
||||||
file_copy.remove_suffix(file.size() - trim_pos);
|
file_copy.remove_suffix(file.size() - trim_pos);
|
||||||
|
12
src/Common/randomNumber.h
Normal file
12
src/Common/randomNumber.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/randomSeed.h>
|
||||||
|
#include <pcg_random.hpp>
|
||||||
|
|
||||||
|
inline UInt32 randomNumber()
|
||||||
|
{
|
||||||
|
pcg64_fast rng{randomSeed()};
|
||||||
|
std::uniform_int_distribution<pcg64_fast::result_type> dist6(
|
||||||
|
std::numeric_limits<UInt32>::min(), std::numeric_limits<UInt32>::max());
|
||||||
|
return static_cast<UInt32>(dist6(rng));
|
||||||
|
}
|
@ -2,6 +2,8 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
|
#include <exception>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <barrier>
|
#include <barrier>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
@ -544,6 +546,99 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(AsyncLoader, IgnoreDependencyFailure)
|
||||||
|
{
|
||||||
|
AsyncLoaderTest t;
|
||||||
|
std::atomic<bool> success{false};
|
||||||
|
t.loader.start();
|
||||||
|
|
||||||
|
std::string_view error_message = "test job failure";
|
||||||
|
|
||||||
|
auto failed_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
|
||||||
|
};
|
||||||
|
auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
success.store(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto failed_job = makeLoadJob({}, "failed_job", failed_job_func);
|
||||||
|
auto dependent_job = makeLoadJob({failed_job},
|
||||||
|
"dependent_job", ignoreDependencyFailure, dependent_job_func);
|
||||||
|
auto task = t.schedule({ failed_job, dependent_job });
|
||||||
|
|
||||||
|
t.loader.wait();
|
||||||
|
|
||||||
|
ASSERT_EQ(failed_job->status(), LoadStatus::FAILED);
|
||||||
|
ASSERT_EQ(dependent_job->status(), LoadStatus::OK);
|
||||||
|
ASSERT_EQ(success.load(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AsyncLoader, CustomDependencyFailure)
|
||||||
|
{
|
||||||
|
AsyncLoaderTest t(16);
|
||||||
|
int error_count = 0;
|
||||||
|
std::atomic<size_t> good_count{0};
|
||||||
|
std::barrier canceled_sync(4);
|
||||||
|
t.loader.start();
|
||||||
|
|
||||||
|
std::string_view error_message = "test job failure";
|
||||||
|
|
||||||
|
auto evil_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
|
||||||
|
};
|
||||||
|
auto good_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
good_count++;
|
||||||
|
};
|
||||||
|
auto late_dep_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
canceled_sync.arrive_and_wait(); // wait for fail (A) before this job is finished
|
||||||
|
};
|
||||||
|
auto collect_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
FAIL(); // job should be canceled, so we never get here
|
||||||
|
};
|
||||||
|
auto dependent_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
|
||||||
|
FAIL(); // job should be canceled, so we never get here
|
||||||
|
};
|
||||||
|
auto fail_after_two = [&] (const LoadJobPtr & self, const LoadJobPtr &, std::exception_ptr & cancel) {
|
||||||
|
if (++error_count == 2)
|
||||||
|
cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
|
||||||
|
"Load job '{}' canceled: too many dependencies have failed",
|
||||||
|
self->name));
|
||||||
|
};
|
||||||
|
|
||||||
|
auto evil_dep1 = makeLoadJob({}, "evil_dep1", evil_dep_func);
|
||||||
|
auto evil_dep2 = makeLoadJob({}, "evil_dep2", evil_dep_func);
|
||||||
|
auto evil_dep3 = makeLoadJob({}, "evil_dep3", evil_dep_func);
|
||||||
|
auto good_dep1 = makeLoadJob({}, "good_dep1", good_dep_func);
|
||||||
|
auto good_dep2 = makeLoadJob({}, "good_dep2", good_dep_func);
|
||||||
|
auto good_dep3 = makeLoadJob({}, "good_dep3", good_dep_func);
|
||||||
|
auto late_dep1 = makeLoadJob({}, "late_dep1", late_dep_func);
|
||||||
|
auto late_dep2 = makeLoadJob({}, "late_dep2", late_dep_func);
|
||||||
|
auto late_dep3 = makeLoadJob({}, "late_dep3", late_dep_func);
|
||||||
|
auto collect_job = makeLoadJob({
|
||||||
|
evil_dep1, evil_dep2, evil_dep3,
|
||||||
|
good_dep1, good_dep2, good_dep3,
|
||||||
|
late_dep1, late_dep2, late_dep3
|
||||||
|
}, "collect_job", fail_after_two, collect_job_func);
|
||||||
|
auto dependent_job1 = makeLoadJob({ collect_job }, "dependent_job1", dependent_job_func);
|
||||||
|
auto dependent_job2 = makeLoadJob({ collect_job }, "dependent_job2", dependent_job_func);
|
||||||
|
auto dependent_job3 = makeLoadJob({ collect_job }, "dependent_job3", dependent_job_func);
|
||||||
|
auto task = t.schedule({ dependent_job1, dependent_job2, dependent_job3 }); // Other jobs should be discovery automatically
|
||||||
|
|
||||||
|
t.loader.wait(collect_job, true);
|
||||||
|
canceled_sync.arrive_and_wait(); // (A)
|
||||||
|
|
||||||
|
t.loader.wait();
|
||||||
|
|
||||||
|
ASSERT_EQ(late_dep1->status(), LoadStatus::OK);
|
||||||
|
ASSERT_EQ(late_dep2->status(), LoadStatus::OK);
|
||||||
|
ASSERT_EQ(late_dep3->status(), LoadStatus::OK);
|
||||||
|
ASSERT_EQ(collect_job->status(), LoadStatus::CANCELED);
|
||||||
|
ASSERT_EQ(dependent_job1->status(), LoadStatus::CANCELED);
|
||||||
|
ASSERT_EQ(dependent_job2->status(), LoadStatus::CANCELED);
|
||||||
|
ASSERT_EQ(dependent_job3->status(), LoadStatus::CANCELED);
|
||||||
|
ASSERT_EQ(good_count.load(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(AsyncLoader, TestConcurrency)
|
TEST(AsyncLoader, TestConcurrency)
|
||||||
{
|
{
|
||||||
AsyncLoaderTest t(10);
|
AsyncLoaderTest t(10);
|
||||||
|
101
src/Common/tests/gtest_generate_random_by_regexp.cpp
Normal file
101
src/Common/tests/gtest_generate_random_by_regexp.cpp
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
#include <Common/MatchGenerator.h>
|
||||||
|
#include <Common/ObjectStorageKeyGenerator.h>
|
||||||
|
#include <Common/Stopwatch.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
void routine(String s)
|
||||||
|
{
|
||||||
|
std::cerr << "case '"<< s << "'";
|
||||||
|
auto gen = DB::RandomStringGeneratorByRegexp(s);
|
||||||
|
[[maybe_unused]] auto res = gen.generate();
|
||||||
|
std::cerr << " result '"<< res << "'" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GenerateRandomString, Positive)
|
||||||
|
{
|
||||||
|
routine(".");
|
||||||
|
routine("[[:xdigit:]]");
|
||||||
|
routine("[0-9a-f]");
|
||||||
|
routine("[a-z]");
|
||||||
|
routine("prefix-[0-9a-f]-suffix");
|
||||||
|
routine("prefix-[a-z]-suffix");
|
||||||
|
routine("[0-9a-f]{3}");
|
||||||
|
routine("prefix-[0-9a-f]{3}-suffix");
|
||||||
|
routine("prefix-[a-z]{3}-suffix/[0-9a-f]{20}");
|
||||||
|
routine("left|right");
|
||||||
|
routine("[a-z]{0,3}");
|
||||||
|
routine("just constant string");
|
||||||
|
routine("[a-z]?");
|
||||||
|
routine("[a-z]*");
|
||||||
|
routine("[a-z]+");
|
||||||
|
routine("[^a-z]");
|
||||||
|
routine("[[:lower:]]{3}/suffix");
|
||||||
|
routine("prefix-(A|B|[0-9a-f]){3}");
|
||||||
|
routine("mergetree/[a-z]{3}/[a-z]{29}");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GenerateRandomString, Negative)
|
||||||
|
{
|
||||||
|
EXPECT_THROW(routine("[[:do_not_exists:]]"), DB::Exception);
|
||||||
|
EXPECT_THROW(routine("[:do_not_exis..."), DB::Exception);
|
||||||
|
EXPECT_THROW(routine("^abc"), DB::Exception);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GenerateRandomString, DifferentResult)
|
||||||
|
{
|
||||||
|
std::cerr << "100 different keys" << std::endl;
|
||||||
|
auto gen = DB::RandomStringGeneratorByRegexp("prefix-[a-z]{3}-suffix/[0-9a-f]{20}");
|
||||||
|
std::set<String> deduplicate;
|
||||||
|
for (int i = 0; i < 100; ++i)
|
||||||
|
ASSERT_TRUE(deduplicate.insert(gen.generate()).second);
|
||||||
|
std::cerr << "100 different keys: ok" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GenerateRandomString, FullRange)
|
||||||
|
{
|
||||||
|
std::cerr << "all possible letters" << std::endl;
|
||||||
|
auto gen = DB::RandomStringGeneratorByRegexp("[a-z]");
|
||||||
|
std::set<String> deduplicate;
|
||||||
|
int count = 'z' - 'a' + 1;
|
||||||
|
while (deduplicate.size() < count)
|
||||||
|
if (deduplicate.insert(gen.generate()).second)
|
||||||
|
std::cerr << " +1 ";
|
||||||
|
std::cerr << "all possible letters, ok" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
UInt64 elapsed(DB::ObjectStorageKeysGeneratorPtr generator)
|
||||||
|
{
|
||||||
|
String path = "some_path";
|
||||||
|
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
for (int i = 0; i < 100000; ++i)
|
||||||
|
{
|
||||||
|
[[ maybe_unused ]] auto result = generator->generate(path).serialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
return watch.elapsedMicroseconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ObjectStorageKey, Performance)
|
||||||
|
{
|
||||||
|
auto elapsed_old = elapsed(DB::createObjectStorageKeysGeneratorByPrefix(
|
||||||
|
"xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/"));
|
||||||
|
std::cerr << "old: " << elapsed_old << std::endl;
|
||||||
|
|
||||||
|
auto elapsed_new = elapsed(DB::createObjectStorageKeysGeneratorByTemplate(
|
||||||
|
"xx-xx-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/mergetree/[a-z]{3}/[a-z]{29}"));
|
||||||
|
std::cerr << "new: " << elapsed_new << std::endl;
|
||||||
|
|
||||||
|
if (elapsed_new > elapsed_old)
|
||||||
|
{
|
||||||
|
if (elapsed_new > elapsed_old)
|
||||||
|
std::cerr << "slow ratio: +" << float(elapsed_new) / elapsed_old << std::endl;
|
||||||
|
else
|
||||||
|
std::cerr << "fast ratio: " << float(elapsed_old) / elapsed_new << std::endl;
|
||||||
|
ASSERT_LT(elapsed_new, 1.2 * elapsed_old);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
#include <Compression/ICompressionCodec.h>
|
#include <Compression/CompressionCodecZSTD.h>
|
||||||
#include <Compression/CompressionInfo.h>
|
#include <Compression/CompressionInfo.h>
|
||||||
#include <Compression/CompressionFactory.h>
|
#include <Compression/CompressionFactory.h>
|
||||||
#include <zstd.h>
|
#include <zstd.h>
|
||||||
@ -9,42 +9,11 @@
|
|||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <IO/BufferWithOwnMemory.h>
|
#include <IO/BufferWithOwnMemory.h>
|
||||||
|
#include <Poco/Logger.h>
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class CompressionCodecZSTD : public ICompressionCodec
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
|
|
||||||
static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
|
|
||||||
|
|
||||||
explicit CompressionCodecZSTD(int level_);
|
|
||||||
CompressionCodecZSTD(int level_, int window_log);
|
|
||||||
|
|
||||||
uint8_t getMethodByte() const override;
|
|
||||||
|
|
||||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
|
||||||
|
|
||||||
void updateHash(SipHash & hash) const override;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
|
||||||
|
|
||||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
|
||||||
|
|
||||||
bool isCompression() const override { return true; }
|
|
||||||
bool isGenericCompression() const override { return true; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
const int level;
|
|
||||||
const bool enable_long_range;
|
|
||||||
const int window_log;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_COMPRESS;
|
extern const int CANNOT_COMPRESS;
|
||||||
@ -82,7 +51,7 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s
|
|||||||
ZSTD_freeCCtx(cctx);
|
ZSTD_freeCCtx(cctx);
|
||||||
|
|
||||||
if (ZSTD_isError(compressed_size))
|
if (ZSTD_isError(compressed_size))
|
||||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", std::string(ZSTD_getErrorName(compressed_size)));
|
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD codec: {}", ZSTD_getErrorName(compressed_size));
|
||||||
|
|
||||||
return static_cast<UInt32>(compressed_size);
|
return static_cast<UInt32>(compressed_size);
|
||||||
}
|
}
|
||||||
@ -96,13 +65,19 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
|
|||||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res)));
|
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress ZSTD-encoded data: {}", std::string(ZSTD_getErrorName(res)));
|
||||||
}
|
}
|
||||||
|
|
||||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
|
CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_)
|
||||||
|
: level(level_)
|
||||||
|
, enable_long_range(true)
|
||||||
|
, window_log(window_log_)
|
||||||
{
|
{
|
||||||
setCodecDescription(
|
setCodecDescription(
|
||||||
"ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level)), std::make_shared<ASTLiteral>(static_cast<UInt64>(window_log))});
|
"ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level)), std::make_shared<ASTLiteral>(static_cast<UInt64>(window_log))});
|
||||||
}
|
}
|
||||||
|
|
||||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_) : level(level_), enable_long_range(false), window_log(0)
|
CompressionCodecZSTD::CompressionCodecZSTD(int level_)
|
||||||
|
: level(level_)
|
||||||
|
, enable_long_range(false)
|
||||||
|
, window_log(0)
|
||||||
{
|
{
|
||||||
setCodecDescription("ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
setCodecDescription("ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||||
}
|
}
|
||||||
|
38
src/Compression/CompressionCodecZSTD.h
Normal file
38
src/Compression/CompressionCodecZSTD.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Compression/ICompressionCodec.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class CompressionCodecZSTD : public ICompressionCodec
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
|
||||||
|
static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
|
||||||
|
|
||||||
|
explicit CompressionCodecZSTD(int level_);
|
||||||
|
CompressionCodecZSTD(int level_, int window_log);
|
||||||
|
|
||||||
|
uint8_t getMethodByte() const override;
|
||||||
|
|
||||||
|
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||||
|
|
||||||
|
void updateHash(SipHash & hash) const override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||||
|
|
||||||
|
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||||
|
|
||||||
|
bool isCompression() const override { return true; }
|
||||||
|
bool isGenericCompression() const override { return true; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int level;
|
||||||
|
const bool enable_long_range;
|
||||||
|
const int window_log;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
113
src/Compression/CompressionCodecZSTDQAT.cpp
Normal file
113
src/Compression/CompressionCodecZSTDQAT.cpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
#ifdef ENABLE_ZSTD_QAT_CODEC
|
||||||
|
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
#include <Compression/CompressionCodecZSTD.h>
|
||||||
|
#include <Compression/CompressionFactory.h>
|
||||||
|
#include <Parsers/ASTLiteral.h>
|
||||||
|
#include <Parsers/IAST.h>
|
||||||
|
#include <Poco/Logger.h>
|
||||||
|
#include <qatseqprod.h>
|
||||||
|
#include <zstd.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int CANNOT_COMPRESS;
|
||||||
|
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||||
|
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hardware-accelerated ZSTD. Supports only compression so far.
|
||||||
|
class CompressionCodecZSTDQAT : public CompressionCodecZSTD
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto ZSTDQAT_SUPPORTED_MIN_LEVEL = 1;
|
||||||
|
static constexpr auto ZSTDQAT_SUPPORTED_MAX_LEVEL = 12;
|
||||||
|
static constexpr int ZSTDQAT_DEVICE_UNINITIALIZED = 0XFFFF;
|
||||||
|
|
||||||
|
explicit CompressionCodecZSTDQAT(int level_);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
bool isZstdQat() const override { return true; }
|
||||||
|
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int level;
|
||||||
|
Poco::Logger * log;
|
||||||
|
static std::atomic<int> qat_state; /// Global initialization status of QAT device, we fall back back to software compression if uninitialized
|
||||||
|
};
|
||||||
|
|
||||||
|
std::atomic<int> CompressionCodecZSTDQAT::qat_state = ZSTDQAT_DEVICE_UNINITIALIZED;
|
||||||
|
|
||||||
|
UInt32 CompressionCodecZSTDQAT::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||||
|
{
|
||||||
|
if (qat_state == ZSTDQAT_DEVICE_UNINITIALIZED)
|
||||||
|
{
|
||||||
|
qat_state = QZSTD_startQatDevice();
|
||||||
|
if (qat_state == QZSTD_OK)
|
||||||
|
LOG_DEBUG(log, "Initialization of hardware-assissted ZSTD_QAT codec successful");
|
||||||
|
else
|
||||||
|
LOG_WARNING(log, "Initialization of hardware-assisted ZSTD_QAT codec failed, falling back to software ZSTD codec -> status: {}", qat_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
ZSTD_CCtx * cctx = ZSTD_createCCtx();
|
||||||
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
|
||||||
|
|
||||||
|
void * sequence_producer_state = nullptr;
|
||||||
|
if (qat_state == QZSTD_OK)
|
||||||
|
{
|
||||||
|
sequence_producer_state = QZSTD_createSeqProdState();
|
||||||
|
ZSTD_registerSequenceProducer(cctx, sequence_producer_state, qatSequenceProducer);
|
||||||
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t compressed_size = ZSTD_compress2(cctx, dest, ZSTD_compressBound(source_size), source, source_size);
|
||||||
|
QZSTD_freeSeqProdState(sequence_producer_state);
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
|
||||||
|
if (ZSTD_isError(compressed_size))
|
||||||
|
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with ZSTD_QAT codec: {}", ZSTD_getErrorName(compressed_size));
|
||||||
|
|
||||||
|
return static_cast<UInt32>(compressed_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerCodecZSTDQAT(CompressionCodecFactory & factory)
|
||||||
|
{
|
||||||
|
UInt8 method_code = static_cast<UInt8>(CompressionMethodByte::ZSTD_QPL);
|
||||||
|
factory.registerCompressionCodec("ZSTD_QAT", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr
|
||||||
|
{
|
||||||
|
int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL;
|
||||||
|
if (arguments && !arguments->children.empty())
|
||||||
|
{
|
||||||
|
if (arguments->children.size() > 1)
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "ZSTD_QAT codec must have 1 parameter, given {}", arguments->children.size());
|
||||||
|
|
||||||
|
const auto children = arguments->children;
|
||||||
|
const auto * literal = children[0]->as<ASTLiteral>();
|
||||||
|
if (!literal)
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "ZSTD_QAT codec argument must be integer");
|
||||||
|
|
||||||
|
level = static_cast<int>(literal->value.safeGet<UInt64>());
|
||||||
|
if (level < CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MIN_LEVEL || level > CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MAX_LEVEL)
|
||||||
|
/// that's a hardware limitation
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER,
|
||||||
|
"ZSTDQAT codec doesn't support level more than {} and lower than {} , given {}",
|
||||||
|
CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MAX_LEVEL, CompressionCodecZSTDQAT::ZSTDQAT_SUPPORTED_MIN_LEVEL, level);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_shared<CompressionCodecZSTDQAT>(level);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
CompressionCodecZSTDQAT::CompressionCodecZSTDQAT(int level_)
|
||||||
|
: CompressionCodecZSTD(level_)
|
||||||
|
, level(level_)
|
||||||
|
, log(&Poco::Logger::get("CompressionCodecZSTDQAT"))
|
||||||
|
{
|
||||||
|
setCodecDescription("ZSTD_QAT", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -167,6 +167,9 @@ void registerCodecNone(CompressionCodecFactory & factory);
|
|||||||
void registerCodecLZ4(CompressionCodecFactory & factory);
|
void registerCodecLZ4(CompressionCodecFactory & factory);
|
||||||
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
||||||
void registerCodecZSTD(CompressionCodecFactory & factory);
|
void registerCodecZSTD(CompressionCodecFactory & factory);
|
||||||
|
#ifdef ENABLE_ZSTD_QAT_CODEC
|
||||||
|
void registerCodecZSTDQAT(CompressionCodecFactory & factory);
|
||||||
|
#endif
|
||||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||||
#ifdef ENABLE_QPL_COMPRESSION
|
#ifdef ENABLE_QPL_COMPRESSION
|
||||||
void registerCodecDeflateQpl(CompressionCodecFactory & factory);
|
void registerCodecDeflateQpl(CompressionCodecFactory & factory);
|
||||||
@ -189,6 +192,9 @@ CompressionCodecFactory::CompressionCodecFactory()
|
|||||||
registerCodecNone(*this);
|
registerCodecNone(*this);
|
||||||
registerCodecLZ4(*this);
|
registerCodecLZ4(*this);
|
||||||
registerCodecZSTD(*this);
|
registerCodecZSTD(*this);
|
||||||
|
#ifdef ENABLE_ZSTD_QAT_CODEC
|
||||||
|
registerCodecZSTDQAT(*this);
|
||||||
|
#endif
|
||||||
registerCodecLZ4HC(*this);
|
registerCodecLZ4HC(*this);
|
||||||
registerCodecMultiple(*this);
|
registerCodecMultiple(*this);
|
||||||
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
|
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
|
||||||
|
@ -40,10 +40,10 @@ public:
|
|||||||
CompressionCodecPtr getDefaultCodec() const;
|
CompressionCodecPtr getDefaultCodec() const;
|
||||||
|
|
||||||
/// Validate codecs AST specified by user and parses codecs description (substitute default parameters)
|
/// Validate codecs AST specified by user and parses codecs description (substitute default parameters)
|
||||||
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const;
|
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const;
|
||||||
|
|
||||||
/// Validate codecs AST specified by user
|
/// Validate codecs AST specified by user
|
||||||
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const;
|
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const;
|
||||||
|
|
||||||
/// Get codec by AST and possible column_type. Some codecs can use
|
/// Get codec by AST and possible column_type. Some codecs can use
|
||||||
/// information about type to improve inner settings, but every codec should
|
/// information about type to improve inner settings, but every codec should
|
||||||
|
@ -34,7 +34,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
|
|
||||||
void CompressionCodecFactory::validateCodec(
|
void CompressionCodecFactory::validateCodec(
|
||||||
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const
|
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const
|
||||||
{
|
{
|
||||||
if (family_name.empty())
|
if (family_name.empty())
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty");
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty");
|
||||||
@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec(
|
|||||||
{
|
{
|
||||||
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
||||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
|
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
|
||||||
{}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec);
|
{}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
||||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
|
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
|
||||||
{}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec);
|
{}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||||
const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const
|
const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const
|
||||||
{
|
{
|
||||||
if (const auto * func = ast->as<ASTFunction>())
|
if (const auto * func = ast->as<ASTFunction>())
|
||||||
{
|
{
|
||||||
@ -165,6 +165,12 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
|||||||
" You can enable it with the 'enable_deflate_qpl_codec' setting.",
|
" You can enable it with the 'enable_deflate_qpl_codec' setting.",
|
||||||
codec_family_name);
|
codec_family_name);
|
||||||
|
|
||||||
|
if (!enable_zstd_qat_codec && result_codec->isZstdQat())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Codec {} is disabled by default."
|
||||||
|
" You can enable it with the 'enable_zstd_qat_codec' setting.",
|
||||||
|
codec_family_name);
|
||||||
|
|
||||||
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
|
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,6 +48,7 @@ enum class CompressionMethodByte : uint8_t
|
|||||||
FPC = 0x98,
|
FPC = 0x98,
|
||||||
DeflateQpl = 0x99,
|
DeflateQpl = 0x99,
|
||||||
GCD = 0x9a,
|
GCD = 0x9a,
|
||||||
|
ZSTD_QPL = 0x9b,
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -121,6 +121,9 @@ public:
|
|||||||
/// Is this the DEFLATE_QPL codec?
|
/// Is this the DEFLATE_QPL codec?
|
||||||
virtual bool isDeflateQpl() const { return false; }
|
virtual bool isDeflateQpl() const { return false; }
|
||||||
|
|
||||||
|
/// Is this the ZSTD_QAT codec?
|
||||||
|
virtual bool isZstdQat() const { return false; }
|
||||||
|
|
||||||
/// If it does nothing.
|
/// If it does nothing.
|
||||||
virtual bool isNone() const { return false; }
|
virtual bool isNone() const { return false; }
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
|
|||||||
{
|
{
|
||||||
std::lock_guard client_lock{snapshot_s3_client_mutex};
|
std::lock_guard client_lock{snapshot_s3_client_mutex};
|
||||||
// if client is not changed (same auth settings, same endpoint) we don't need to update
|
// if client is not changed (same auth settings, same endpoint) we don't need to update
|
||||||
if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings
|
if (snapshot_s3_client && snapshot_s3_client->client && !snapshot_s3_client->auth_settings.hasUpdates(auth_settings)
|
||||||
&& snapshot_s3_client->uri.uri == new_uri.uri)
|
&& snapshot_s3_client->uri.uri == new_uri.uri)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -188,4 +188,46 @@ String GTIDSets::toPayload() const
|
|||||||
return buffer.str();
|
return buffer.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GTIDSet::contains(const GTIDSet & gtid_set) const
|
||||||
|
{
|
||||||
|
//we contain the other set if each of its intervals are contained in any of our intervals.
|
||||||
|
//use the fact that intervals are sorted to make this linear instead of quadratic.
|
||||||
|
if (uuid != gtid_set.uuid) { return false; }
|
||||||
|
|
||||||
|
auto mine = intervals.begin(), other = gtid_set.intervals.begin();
|
||||||
|
auto my_end = intervals.end(), other_end = gtid_set.intervals.end();
|
||||||
|
while (mine != my_end && other != other_end)
|
||||||
|
{
|
||||||
|
bool mine_contains_other = mine->start <= other->start && mine->end >= other->end;
|
||||||
|
if (mine_contains_other)
|
||||||
|
{
|
||||||
|
++other;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++mine;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return other == other_end; //if we've iterated through all intervals in the argument, all its intervals are contained in this
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GTIDSets::contains(const GTIDSet & gtid_set) const
|
||||||
|
{
|
||||||
|
for (const auto & my_gtid_set : sets)
|
||||||
|
{
|
||||||
|
if (my_gtid_set.contains(gtid_set)) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GTIDSets::contains(const GTIDSets & gtid_sets) const
|
||||||
|
{
|
||||||
|
for (const auto & gtid_set : gtid_sets.sets)
|
||||||
|
{
|
||||||
|
if (!this->contains(gtid_set)) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,8 @@ public:
|
|||||||
void tryMerge(size_t i);
|
void tryMerge(size_t i);
|
||||||
|
|
||||||
static void tryShrink(GTIDSet & set, unsigned int i, Interval & current);
|
static void tryShrink(GTIDSet & set, unsigned int i, Interval & current);
|
||||||
|
|
||||||
|
bool contains(const GTIDSet & gtid_set) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GTIDSets
|
class GTIDSets
|
||||||
@ -40,6 +42,31 @@ public:
|
|||||||
|
|
||||||
String toString() const;
|
String toString() const;
|
||||||
String toPayload() const;
|
String toPayload() const;
|
||||||
|
bool contains(const GTIDSet & gtid_set) const;
|
||||||
|
bool contains(const GTIDSets & gtid_sets) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline bool operator==(const GTID & left, const GTID & right)
|
||||||
|
{
|
||||||
|
return left.uuid == right.uuid
|
||||||
|
&& left.seq_no == right.seq_no;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const GTIDSet::Interval & left, const GTIDSet::Interval & right)
|
||||||
|
{
|
||||||
|
return left.start == right.start
|
||||||
|
&& left.end == right.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const GTIDSet & left, const GTIDSet & right)
|
||||||
|
{
|
||||||
|
return left.uuid == right.uuid
|
||||||
|
&& left.intervals == right.intervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const GTIDSets & left, const GTIDSets & right)
|
||||||
|
{
|
||||||
|
return left.sets == right.sets;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -967,6 +967,59 @@ namespace MySQLReplication
|
|||||||
out << "[DryRun Event]" << '\n';
|
out << "[DryRun Event]" << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnparsedRowsEvent::dump(WriteBuffer & out) const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
header.dump(out);
|
||||||
|
out << "[UnparsedRowsEvent Event]" << '\n';
|
||||||
|
out << "Unparsed Data Size: " << unparsed_data.size() << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnparsedRowsEvent::parseImpl(ReadBuffer & payload_)
|
||||||
|
{
|
||||||
|
char c = 0;
|
||||||
|
if (payload_.position() < payload_.buffer().end())
|
||||||
|
unparsed_data.reserve(payload_.buffer().end() - payload_.position());
|
||||||
|
/// Prevent reading after the end
|
||||||
|
/// payload.available() might have incorrect value
|
||||||
|
while (payload_.position() <= payload_.buffer().end() && payload_.read(c))
|
||||||
|
unparsed_data.push_back(c);
|
||||||
|
if (!payload_.eof())
|
||||||
|
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Available {} bytes but not eof", payload_.available());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<RowsEvent> UnparsedRowsEvent::parse()
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (!unparsed_data.empty())
|
||||||
|
{
|
||||||
|
RowsEventHeader rows_header(header.type);
|
||||||
|
rows_header.table_id = table_id;
|
||||||
|
rows_header.flags = flags;
|
||||||
|
switch (header.type)
|
||||||
|
{
|
||||||
|
case WRITE_ROWS_EVENT_V1:
|
||||||
|
case WRITE_ROWS_EVENT_V2:
|
||||||
|
parsed_event = std::make_shared<WriteRowsEvent>(table_map, EventHeader(header), rows_header);
|
||||||
|
break;
|
||||||
|
case DELETE_ROWS_EVENT_V1:
|
||||||
|
case DELETE_ROWS_EVENT_V2:
|
||||||
|
parsed_event = std::make_shared<DeleteRowsEvent>(table_map, EventHeader(header), rows_header);
|
||||||
|
break;
|
||||||
|
case UPDATE_ROWS_EVENT_V1:
|
||||||
|
case UPDATE_ROWS_EVENT_V2:
|
||||||
|
parsed_event = std::make_shared<UpdateRowsEvent>(table_map, EventHeader(header), rows_header);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown event type: {}", magic_enum::enum_name(header.type));
|
||||||
|
}
|
||||||
|
ReadBufferFromMemory payload(unparsed_data.data(), unparsed_data.size());
|
||||||
|
parsed_event->parseEvent(payload);
|
||||||
|
unparsed_data.clear();
|
||||||
|
}
|
||||||
|
return parsed_event;
|
||||||
|
}
|
||||||
|
|
||||||
/// Update binlog name/position/gtid based on the event type.
|
/// Update binlog name/position/gtid based on the event type.
|
||||||
void Position::update(BinlogEventPtr event)
|
void Position::update(BinlogEventPtr event)
|
||||||
{
|
{
|
||||||
@ -998,7 +1051,8 @@ namespace MySQLReplication
|
|||||||
case ROTATE_EVENT: {
|
case ROTATE_EVENT: {
|
||||||
auto rotate = std::static_pointer_cast<RotateEvent>(event);
|
auto rotate = std::static_pointer_cast<RotateEvent>(event);
|
||||||
binlog_name = rotate->next_binlog;
|
binlog_name = rotate->next_binlog;
|
||||||
binlog_pos = event->header.log_pos;
|
/// If binlog name has changed, need to use position from next binlog
|
||||||
|
binlog_pos = rotate->position;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case GTID_EVENT: {
|
case GTID_EVENT: {
|
||||||
@ -1012,13 +1066,18 @@ namespace MySQLReplication
|
|||||||
default:
|
default:
|
||||||
throw ReplicationError(ErrorCodes::LOGICAL_ERROR, "Position update with unsupported event");
|
throw ReplicationError(ErrorCodes::LOGICAL_ERROR, "Position update with unsupported event");
|
||||||
}
|
}
|
||||||
|
if (event->header.timestamp > 0)
|
||||||
|
{
|
||||||
|
timestamp = event->header.timestamp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Position::update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_)
|
void Position::update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_, UInt32 binlog_time_)
|
||||||
{
|
{
|
||||||
binlog_pos = binlog_pos_;
|
binlog_pos = binlog_pos_;
|
||||||
binlog_name = binlog_name_;
|
binlog_name = binlog_name_;
|
||||||
gtid_sets.parse(gtid_sets_);
|
gtid_sets.parse(gtid_sets_);
|
||||||
|
timestamp = binlog_time_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Position::dump(WriteBuffer & out) const
|
void Position::dump(WriteBuffer & out) const
|
||||||
|
@ -181,6 +181,7 @@ namespace MySQLReplication
|
|||||||
MYSQL_WRITE_ROWS_EVENT = 2,
|
MYSQL_WRITE_ROWS_EVENT = 2,
|
||||||
MYSQL_UPDATE_ROWS_EVENT = 3,
|
MYSQL_UPDATE_ROWS_EVENT = 3,
|
||||||
MYSQL_DELETE_ROWS_EVENT = 4,
|
MYSQL_DELETE_ROWS_EVENT = 4,
|
||||||
|
MYSQL_UNPARSED_ROWS_EVENT = 100,
|
||||||
};
|
};
|
||||||
|
|
||||||
class ReplicationError : public DB::Exception
|
class ReplicationError : public DB::Exception
|
||||||
@ -274,6 +275,8 @@ namespace MySQLReplication
|
|||||||
String status;
|
String status;
|
||||||
String schema;
|
String schema;
|
||||||
String query;
|
String query;
|
||||||
|
String query_database_name;
|
||||||
|
String query_table_name;
|
||||||
QueryType typ = QUERY_EVENT_DDL;
|
QueryType typ = QUERY_EVENT_DDL;
|
||||||
bool transaction_complete = true;
|
bool transaction_complete = true;
|
||||||
|
|
||||||
@ -446,7 +449,6 @@ namespace MySQLReplication
|
|||||||
void parseImpl(ReadBuffer & payload) override;
|
void parseImpl(ReadBuffer & payload) override;
|
||||||
void parseRow(ReadBuffer & payload, Bitmap & bitmap);
|
void parseRow(ReadBuffer & payload, Bitmap & bitmap);
|
||||||
|
|
||||||
private:
|
|
||||||
std::shared_ptr<TableMapEvent> table_map;
|
std::shared_ptr<TableMapEvent> table_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -497,17 +499,38 @@ namespace MySQLReplication
|
|||||||
void parseImpl(ReadBuffer & payload) override;
|
void parseImpl(ReadBuffer & payload) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class UnparsedRowsEvent : public RowsEvent
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
UnparsedRowsEvent(const std::shared_ptr<TableMapEvent> & table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
|
||||||
|
: RowsEvent(table_map_, std::move(header_), rows_header)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump(WriteBuffer & out) const override;
|
||||||
|
MySQLEventType type() const override { return MYSQL_UNPARSED_ROWS_EVENT; }
|
||||||
|
std::shared_ptr<RowsEvent> parse();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void parseImpl(ReadBuffer & payload) override;
|
||||||
|
std::vector<uint8_t> unparsed_data;
|
||||||
|
std::shared_ptr<RowsEvent> parsed_event;
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
};
|
||||||
|
|
||||||
class Position
|
class Position
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
UInt64 binlog_pos;
|
UInt64 binlog_pos;
|
||||||
String binlog_name;
|
String binlog_name;
|
||||||
GTIDSets gtid_sets;
|
GTIDSets gtid_sets;
|
||||||
|
UInt32 timestamp;
|
||||||
|
|
||||||
Position() : binlog_pos(0) { }
|
Position() : binlog_pos(0), timestamp(0) { }
|
||||||
void update(BinlogEventPtr event);
|
void update(BinlogEventPtr event);
|
||||||
void update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_);
|
void update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_, UInt32 binlog_time_);
|
||||||
void dump(WriteBuffer & out) const;
|
void dump(WriteBuffer & out) const;
|
||||||
|
void resetPendingGTID() { pending_gtid.reset(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::optional<GTID> pending_gtid;
|
std::optional<GTID> pending_gtid;
|
||||||
|
40
src/Core/MySQL/tests/gtest_MySQLGtid.cpp
Normal file
40
src/Core/MySQL/tests/gtest_MySQLGtid.cpp
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <Core/MySQL/MySQLGtid.h>
|
||||||
|
|
||||||
|
using namespace DB;
|
||||||
|
|
||||||
|
|
||||||
|
GTEST_TEST(GTIDSetsContains, Tests)
|
||||||
|
{
|
||||||
|
GTIDSets gtid_set,
|
||||||
|
contained1, contained2, contained3, contained4, contained5,
|
||||||
|
not_contained1, not_contained2, not_contained3, not_contained4, not_contained5, not_contained6;
|
||||||
|
|
||||||
|
gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
|
||||||
|
contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
|
||||||
|
contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:2-3:11:47-49");
|
||||||
|
contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:11");
|
||||||
|
contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:47-49:60");
|
||||||
|
contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:60");
|
||||||
|
|
||||||
|
not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
|
||||||
|
not_contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:0-3:11:47-49");
|
||||||
|
not_contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:99");
|
||||||
|
not_contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:46-49:60");
|
||||||
|
not_contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:99");
|
||||||
|
not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60, 00000000-0000-0000-0000-000000000000");
|
||||||
|
|
||||||
|
|
||||||
|
ASSERT_TRUE(gtid_set.contains(contained1));
|
||||||
|
ASSERT_TRUE(gtid_set.contains(contained2));
|
||||||
|
ASSERT_TRUE(gtid_set.contains(contained3));
|
||||||
|
ASSERT_TRUE(gtid_set.contains(contained4));
|
||||||
|
ASSERT_TRUE(gtid_set.contains(contained5));
|
||||||
|
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained1));
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained2));
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained3));
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained4));
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained5));
|
||||||
|
ASSERT_FALSE(gtid_set.contains(not_contained6));
|
||||||
|
}
|
@ -204,6 +204,8 @@ class IColumn;
|
|||||||
M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
|
M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
|
||||||
M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \
|
M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \
|
||||||
M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \
|
M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \
|
||||||
|
M(UInt64, output_format_compression_level, 3, "Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when inserting to table function `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`.", 0) \
|
||||||
|
M(UInt64, output_format_compression_zstd_window_log, 0, "Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression.", 0) \
|
||||||
\
|
\
|
||||||
M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \
|
M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \
|
||||||
M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \
|
M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \
|
||||||
@ -352,6 +354,7 @@ class IColumn;
|
|||||||
M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
|
M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
|
||||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
||||||
M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \
|
M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \
|
||||||
|
M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \
|
||||||
M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||||
M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||||
M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \
|
M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \
|
||||||
|
@ -227,11 +227,17 @@ LoadTaskPtr DatabaseOrdinary::startupDatabaseAsync(
|
|||||||
LoadJobSet startup_after,
|
LoadJobSet startup_after,
|
||||||
LoadingStrictnessLevel /*mode*/)
|
LoadingStrictnessLevel /*mode*/)
|
||||||
{
|
{
|
||||||
// NOTE: this task is empty, but it is required for correct dependency handling (startup should be done after tables loading)
|
|
||||||
auto job = makeLoadJob(
|
auto job = makeLoadJob(
|
||||||
std::move(startup_after),
|
std::move(startup_after),
|
||||||
TablesLoaderBackgroundStartupPoolId,
|
TablesLoaderBackgroundStartupPoolId,
|
||||||
fmt::format("startup Ordinary database {}", getDatabaseName()));
|
fmt::format("startup Ordinary database {}", getDatabaseName()),
|
||||||
|
ignoreDependencyFailure,
|
||||||
|
[] (AsyncLoader &, const LoadJobPtr &)
|
||||||
|
{
|
||||||
|
// NOTE: this job is no-op, but it is required for correct dependency handling
|
||||||
|
// 1) startup should be done after tables loading
|
||||||
|
// 2) load or startup errors for tables should not lead to not starting up the whole database
|
||||||
|
});
|
||||||
return startup_database_task = makeLoadTask(async_loader, {job});
|
return startup_database_task = makeLoadTask(async_loader, {job});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
# include <Databases/DatabaseFactory.h>
|
# include <Databases/DatabaseFactory.h>
|
||||||
# include <Databases/MySQL/DatabaseMaterializedTablesIterator.h>
|
# include <Databases/MySQL/DatabaseMaterializedTablesIterator.h>
|
||||||
# include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
# include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||||
|
# include <Databases/MySQL/MySQLBinlogClientFactory.h>
|
||||||
# include <Parsers/ASTCreateQuery.h>
|
# include <Parsers/ASTCreateQuery.h>
|
||||||
# include <Parsers/ASTFunction.h>
|
# include <Parsers/ASTFunction.h>
|
||||||
# include <Parsers/queryToString.h>
|
# include <Parsers/queryToString.h>
|
||||||
@ -39,10 +40,11 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL(
|
|||||||
const String & mysql_database_name_,
|
const String & mysql_database_name_,
|
||||||
mysqlxx::Pool && pool_,
|
mysqlxx::Pool && pool_,
|
||||||
MySQLClient && client_,
|
MySQLClient && client_,
|
||||||
|
const MySQLReplication::BinlogClientPtr & binlog_client_,
|
||||||
std::unique_ptr<MaterializedMySQLSettings> settings_)
|
std::unique_ptr<MaterializedMySQLSettings> settings_)
|
||||||
: DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL(" + database_name_ + ")", context_)
|
: DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL(" + database_name_ + ")", context_)
|
||||||
, settings(std::move(settings_))
|
, settings(std::move(settings_))
|
||||||
, materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get())
|
, materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), binlog_client_, settings.get())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,6 +199,7 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
|
|||||||
|
|
||||||
if (!engine->arguments)
|
if (!engine->arguments)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
|
||||||
|
MySQLReplication::BinlogClientPtr binlog_client;
|
||||||
StorageMySQL::Configuration configuration;
|
StorageMySQL::Configuration configuration;
|
||||||
ASTs & arguments = engine->arguments->children;
|
ASTs & arguments = engine->arguments->children;
|
||||||
auto mysql_settings = std::make_unique<MySQLSettings>();
|
auto mysql_settings = std::make_unique<MySQLSettings>();
|
||||||
@ -241,6 +244,12 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
|
|||||||
if (engine_define->settings)
|
if (engine_define->settings)
|
||||||
materialize_mode_settings->loadFromQuery(*engine_define);
|
materialize_mode_settings->loadFromQuery(*engine_define);
|
||||||
|
|
||||||
|
if (materialize_mode_settings->use_binlog_client)
|
||||||
|
binlog_client = DB::MySQLReplication::BinlogClientFactory::instance().getClient(
|
||||||
|
configuration.host, configuration.port, configuration.username, configuration.password,
|
||||||
|
materialize_mode_settings->max_bytes_in_binlog_dispatcher_buffer,
|
||||||
|
materialize_mode_settings->max_flush_milliseconds_in_binlog_dispatcher);
|
||||||
|
|
||||||
if (args.uuid == UUIDHelpers::Nil)
|
if (args.uuid == UUIDHelpers::Nil)
|
||||||
{
|
{
|
||||||
auto print_create_ast = args.create_query.clone();
|
auto print_create_ast = args.create_query.clone();
|
||||||
@ -261,6 +270,7 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
|
|||||||
configuration.database,
|
configuration.database,
|
||||||
std::move(mysql_pool),
|
std::move(mysql_pool),
|
||||||
std::move(client),
|
std::move(client),
|
||||||
|
binlog_client,
|
||||||
std::move(materialize_mode_settings));
|
std::move(materialize_mode_settings));
|
||||||
};
|
};
|
||||||
factory.registerDatabase("MaterializeMySQL", create_fn);
|
factory.registerDatabase("MaterializeMySQL", create_fn);
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <base/UUID.h>
|
#include <base/UUID.h>
|
||||||
#include <Databases/IDatabase.h>
|
#include <Databases/IDatabase.h>
|
||||||
#include <Databases/DatabaseAtomic.h>
|
#include <Databases/DatabaseAtomic.h>
|
||||||
|
#include <Databases/MySQL/MySQLBinlogClient.h>
|
||||||
#include <Databases/MySQL/MaterializedMySQLSettings.h>
|
#include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
@ -31,6 +32,7 @@ public:
|
|||||||
const String & mysql_database_name_,
|
const String & mysql_database_name_,
|
||||||
mysqlxx::Pool && pool_,
|
mysqlxx::Pool && pool_,
|
||||||
MySQLClient && client_,
|
MySQLClient && client_,
|
||||||
|
const MySQLReplication::BinlogClientPtr & binlog_client_,
|
||||||
std::unique_ptr<MaterializedMySQLSettings> settings_);
|
std::unique_ptr<MaterializedMySQLSettings> settings_);
|
||||||
|
|
||||||
void rethrowExceptionIfNeeded() const;
|
void rethrowExceptionIfNeeded() const;
|
||||||
|
@ -17,6 +17,11 @@ class ASTStorage;
|
|||||||
M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \
|
M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \
|
||||||
M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \
|
M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \
|
||||||
M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \
|
M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \
|
||||||
|
M(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \
|
||||||
|
M(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \
|
||||||
|
M(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \
|
||||||
|
M(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \
|
||||||
|
M(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \
|
||||||
|
|
||||||
DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS)
|
DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS)
|
||||||
|
|
||||||
|
@ -26,14 +26,13 @@
|
|||||||
#include <Interpreters/executeQuery.h>
|
#include <Interpreters/executeQuery.h>
|
||||||
#include <Storages/StorageMergeTree.h>
|
#include <Storages/StorageMergeTree.h>
|
||||||
#include <Common/quoteString.h>
|
#include <Common/quoteString.h>
|
||||||
#include <Common/randomSeed.h>
|
#include <Common/randomNumber.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
#include <base/sleep.h>
|
#include <base/sleep.h>
|
||||||
#include <boost/algorithm/string/split.hpp>
|
#include <boost/algorithm/string/split.hpp>
|
||||||
#include <boost/algorithm/string/trim.hpp>
|
#include <boost/algorithm/string/trim.hpp>
|
||||||
#include <Parsers/CommonParsers.h>
|
#include <Parsers/CommonParsers.h>
|
||||||
#include <Parsers/ASTIdentifier.h>
|
#include <Parsers/ASTIdentifier.h>
|
||||||
#include <pcg_random.hpp>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -48,8 +47,43 @@ namespace ErrorCodes
|
|||||||
extern const int UNKNOWN_DATABASE;
|
extern const int UNKNOWN_DATABASE;
|
||||||
extern const int UNKNOWN_EXCEPTION;
|
extern const int UNKNOWN_EXCEPTION;
|
||||||
extern const int CANNOT_READ_ALL_DATA;
|
extern const int CANNOT_READ_ALL_DATA;
|
||||||
|
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||||
|
extern const int NETWORK_ERROR;
|
||||||
|
extern const int UNKNOWN_TABLE;
|
||||||
|
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
|
||||||
|
extern const int THERE_IS_NO_QUERY;
|
||||||
|
extern const int QUERY_WAS_CANCELLED;
|
||||||
|
extern const int TABLE_ALREADY_EXISTS;
|
||||||
|
extern const int DATABASE_ALREADY_EXISTS;
|
||||||
|
extern const int DATABASE_NOT_EMPTY;
|
||||||
|
extern const int TABLE_IS_DROPPED;
|
||||||
|
extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT;
|
||||||
|
extern const int CANNOT_CREATE_CHARSET_CONVERTER;
|
||||||
|
extern const int UNKNOWN_FUNCTION;
|
||||||
|
extern const int UNKNOWN_IDENTIFIER;
|
||||||
|
extern const int UNKNOWN_TYPE;
|
||||||
|
extern const int TIMEOUT_EXCEEDED;
|
||||||
|
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||||
|
extern const int MYSQL_SYNTAX_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// USE MySQL ERROR CODE:
|
||||||
|
// https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html
|
||||||
|
constexpr int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT
|
||||||
|
constexpr int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT
|
||||||
|
constexpr int ER_BAD_DB_ERROR = 1049; /// NOLINT
|
||||||
|
constexpr int ER_MASTER_HAS_PURGED_REQUIRED_GTIDS = 1789; /// NOLINT
|
||||||
|
constexpr int ER_MASTER_FATAL_ERROR_READING_BINLOG = 1236; /// NOLINT
|
||||||
|
|
||||||
|
// https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html
|
||||||
|
constexpr int CR_CONN_HOST_ERROR = 2003; /// NOLINT
|
||||||
|
constexpr int CR_SERVER_GONE_ERROR = 2006; /// NOLINT
|
||||||
|
constexpr int CR_SERVER_LOST = 2013; /// NOLINT
|
||||||
|
constexpr int ER_SERVER_SHUTDOWN = 1053; /// NOLINT
|
||||||
|
constexpr int ER_LOCK_DEADLOCK = 1213; /// NOLINT
|
||||||
|
constexpr int ER_LOCK_WAIT_TIMEOUT = 1205; /// NOLINT
|
||||||
|
constexpr int ER_OPTION_PREVENTS_STATEMENT = 1290; /// NOLINT
|
||||||
|
|
||||||
static constexpr auto MYSQL_BACKGROUND_THREAD_NAME = "MySQLDBSync";
|
static constexpr auto MYSQL_BACKGROUND_THREAD_NAME = "MySQLDBSync";
|
||||||
|
|
||||||
static ContextMutablePtr createQueryContext(ContextPtr context)
|
static ContextMutablePtr createQueryContext(ContextPtr context)
|
||||||
@ -157,12 +191,68 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool shouldReconnectOnException(const std::exception_ptr & e)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::rethrow_exception(e);
|
||||||
|
}
|
||||||
|
catch (const mysqlxx::ConnectionFailed &) {} /// NOLINT
|
||||||
|
catch (const mysqlxx::ConnectionLost &) {} /// NOLINT
|
||||||
|
catch (const Poco::Net::ConnectionResetException &) {} /// NOLINT
|
||||||
|
catch (const Poco::Net::ConnectionRefusedException &) {} /// NOLINT
|
||||||
|
catch (const DB::NetException &) {} /// NOLINT
|
||||||
|
catch (const Poco::Net::NetException & e)
|
||||||
|
{
|
||||||
|
if (e.code() != POCO_ENETDOWN &&
|
||||||
|
e.code() != POCO_ENETUNREACH &&
|
||||||
|
e.code() != POCO_ENETRESET &&
|
||||||
|
e.code() != POCO_ESYSNOTREADY)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
catch (const mysqlxx::BadQuery & e)
|
||||||
|
{
|
||||||
|
// Lost connection to MySQL server during query
|
||||||
|
if (e.code() != CR_SERVER_LOST &&
|
||||||
|
e.code() != ER_SERVER_SHUTDOWN &&
|
||||||
|
e.code() != CR_SERVER_GONE_ERROR &&
|
||||||
|
e.code() != CR_CONN_HOST_ERROR &&
|
||||||
|
e.code() != ER_LOCK_DEADLOCK &&
|
||||||
|
e.code() != ER_LOCK_WAIT_TIMEOUT &&
|
||||||
|
e.code() != ER_OPTION_PREVENTS_STATEMENT)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
catch (const mysqlxx::Exception & e)
|
||||||
|
{
|
||||||
|
// ER_SERVER_SHUTDOWN is thrown in different types under different conditions.
|
||||||
|
// E.g. checkError() in Common/mysqlxx/Exception.cpp will throw mysqlxx::Exception.
|
||||||
|
if (e.code() != CR_SERVER_LOST && e.code() != ER_SERVER_SHUTDOWN && e.code() != CR_SERVER_GONE_ERROR && e.code() != CR_CONN_HOST_ERROR)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
catch (const Poco::Exception & e)
|
||||||
|
{
|
||||||
|
if (e.code() != ErrorCodes::NETWORK_ERROR &&
|
||||||
|
e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED &&
|
||||||
|
e.code() != ErrorCodes::UNKNOWN_TABLE && // Since we have ignored the DDL exception when the tables without primary key, insert into those tables will get UNKNOWN_TABLE.
|
||||||
|
e.code() != ErrorCodes::CANNOT_READ_ALL_DATA &&
|
||||||
|
e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF &&
|
||||||
|
e.code() != ErrorCodes::TIMEOUT_EXCEEDED)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
||||||
ContextPtr context_,
|
ContextPtr context_,
|
||||||
const String & database_name_,
|
const String & database_name_,
|
||||||
const String & mysql_database_name_,
|
const String & mysql_database_name_,
|
||||||
mysqlxx::Pool && pool_,
|
mysqlxx::Pool && pool_,
|
||||||
MySQLClient && client_,
|
MySQLClient && client_,
|
||||||
|
const MySQLReplication::BinlogClientPtr & binlog_client_,
|
||||||
MaterializedMySQLSettings * settings_)
|
MaterializedMySQLSettings * settings_)
|
||||||
: WithContext(context_->getGlobalContext())
|
: WithContext(context_->getGlobalContext())
|
||||||
, log(&Poco::Logger::get("MaterializedMySQLSyncThread"))
|
, log(&Poco::Logger::get("MaterializedMySQLSyncThread"))
|
||||||
@ -170,6 +260,7 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
|||||||
, mysql_database_name(mysql_database_name_)
|
, mysql_database_name(mysql_database_name_)
|
||||||
, pool(std::move(pool_)) /// NOLINT
|
, pool(std::move(pool_)) /// NOLINT
|
||||||
, client(std::move(client_))
|
, client(std::move(client_))
|
||||||
|
, binlog_client(binlog_client_)
|
||||||
, settings(settings_)
|
, settings(settings_)
|
||||||
{
|
{
|
||||||
query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") ";
|
query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") ";
|
||||||
@ -216,14 +307,23 @@ void MaterializedMySQLSyncThread::synchronization()
|
|||||||
UInt64 elapsed_ms = watch.elapsedMilliseconds();
|
UInt64 elapsed_ms = watch.elapsedMilliseconds();
|
||||||
if (elapsed_ms < max_flush_time)
|
if (elapsed_ms < max_flush_time)
|
||||||
{
|
{
|
||||||
BinlogEventPtr binlog_event = client.readOneBinlogEvent(max_flush_time - elapsed_ms);
|
const auto timeout_ms = max_flush_time - elapsed_ms;
|
||||||
if (binlog_event)
|
BinlogEventPtr binlog_event;
|
||||||
|
if (binlog)
|
||||||
|
binlog->tryReadEvent(binlog_event, timeout_ms);
|
||||||
|
else
|
||||||
|
binlog_event = client.readOneBinlogEvent(timeout_ms);
|
||||||
|
if (binlog_event && !ignoreEvent(binlog_event))
|
||||||
onEvent(buffers, binlog_event, metadata);
|
onEvent(buffers, binlog_event, metadata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const Exception & e)
|
catch (const Exception & e)
|
||||||
{
|
{
|
||||||
if (e.code() != ErrorCodes::CANNOT_READ_ALL_DATA || settings->max_wait_time_when_mysql_unavailable < 0)
|
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||||
|
throw;
|
||||||
|
bool binlog_was_purged = e.code() == ER_MASTER_FATAL_ERROR_READING_BINLOG ||
|
||||||
|
e.code() == ER_MASTER_HAS_PURGED_REQUIRED_GTIDS;
|
||||||
|
if (!binlog_was_purged && !shouldReconnectOnException(std::current_exception()))
|
||||||
throw;
|
throw;
|
||||||
|
|
||||||
flushBuffersData(buffers, metadata);
|
flushBuffersData(buffers, metadata);
|
||||||
@ -246,6 +346,7 @@ void MaterializedMySQLSyncThread::synchronization()
|
|||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
client.disconnect();
|
client.disconnect();
|
||||||
|
binlog = nullptr;
|
||||||
tryLogCurrentException(log);
|
tryLogCurrentException(log);
|
||||||
setSynchronizationThreadException(std::current_exception());
|
setSynchronizationThreadException(std::current_exception());
|
||||||
}
|
}
|
||||||
@ -259,6 +360,7 @@ void MaterializedMySQLSyncThread::stopSynchronization()
|
|||||||
if (background_thread_pool->joinable())
|
if (background_thread_pool->joinable())
|
||||||
background_thread_pool->join();
|
background_thread_pool->join();
|
||||||
client.disconnect();
|
client.disconnect();
|
||||||
|
binlog = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -428,14 +530,6 @@ static inline void dumpDataForTables(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline UInt32 randomNumber()
|
|
||||||
{
|
|
||||||
pcg64_fast rng{randomSeed()};
|
|
||||||
std::uniform_int_distribution<pcg64_fast::result_type> dist6(
|
|
||||||
std::numeric_limits<UInt32>::min(), std::numeric_limits<UInt32>::max());
|
|
||||||
return static_cast<UInt32>(dist6(rng));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata)
|
bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata)
|
||||||
{
|
{
|
||||||
bool opened_transaction = false;
|
bool opened_transaction = false;
|
||||||
@ -463,7 +557,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
|||||||
if (!need_dumping_tables.empty())
|
if (!need_dumping_tables.empty())
|
||||||
{
|
{
|
||||||
Position position;
|
Position position;
|
||||||
position.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set);
|
position.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set, 0);
|
||||||
|
|
||||||
metadata.transaction(position, [&]()
|
metadata.transaction(position, [&]()
|
||||||
{
|
{
|
||||||
@ -487,8 +581,20 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
|||||||
if (opened_transaction)
|
if (opened_transaction)
|
||||||
connection->query("COMMIT").execute();
|
connection->query("COMMIT").execute();
|
||||||
|
|
||||||
client.connect();
|
if (binlog_client)
|
||||||
client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum);
|
{
|
||||||
|
binlog_client->setBinlogChecksum(metadata.binlog_checksum);
|
||||||
|
binlog = binlog_client->createBinlog(metadata.executed_gtid_set,
|
||||||
|
database_name,
|
||||||
|
{mysql_database_name},
|
||||||
|
settings->max_bytes_in_binlog_queue,
|
||||||
|
settings->max_milliseconds_to_wait_in_binlog_queue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
client.connect();
|
||||||
|
client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum);
|
||||||
|
}
|
||||||
|
|
||||||
setSynchronizationThreadException(nullptr);
|
setSynchronizationThreadException(nullptr);
|
||||||
return true;
|
return true;
|
||||||
@ -500,20 +606,11 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
|||||||
if (opened_transaction)
|
if (opened_transaction)
|
||||||
connection->query("ROLLBACK").execute();
|
connection->query("ROLLBACK").execute();
|
||||||
|
|
||||||
try
|
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||||
{
|
throw;
|
||||||
|
|
||||||
|
if (!shouldReconnectOnException(std::current_exception()))
|
||||||
throw;
|
throw;
|
||||||
}
|
|
||||||
catch (const mysqlxx::ConnectionFailed & ex)
|
|
||||||
{
|
|
||||||
LOG_TRACE(log, "Connection to MySQL failed {}", ex.displayText());
|
|
||||||
}
|
|
||||||
catch (const mysqlxx::BadQuery & e)
|
|
||||||
{
|
|
||||||
// Lost connection to MySQL server during query
|
|
||||||
if (e.code() != CR_SERVER_LOST || settings->max_wait_time_when_mysql_unavailable < 0)
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
setSynchronizationThreadException(std::current_exception());
|
setSynchronizationThreadException(std::current_exception());
|
||||||
/// Avoid busy loop when MySQL is not available.
|
/// Avoid busy loop when MySQL is not available.
|
||||||
@ -524,17 +621,55 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MaterializedMySQLSyncThread::isTableIgnored(const String & table_name) const
|
||||||
|
{
|
||||||
|
return !materialized_tables_list.empty() && !materialized_tables_list.contains(table_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MaterializedMySQLSyncThread::ignoreEvent(const BinlogEventPtr & event) const
|
||||||
|
{
|
||||||
|
switch (event->type())
|
||||||
|
{
|
||||||
|
case MYSQL_WRITE_ROWS_EVENT:
|
||||||
|
case MYSQL_DELETE_ROWS_EVENT:
|
||||||
|
case MYSQL_UPDATE_ROWS_EVENT:
|
||||||
|
case MYSQL_UNPARSED_ROWS_EVENT:
|
||||||
|
{
|
||||||
|
auto table_name = static_cast<RowsEvent &>(*event).table;
|
||||||
|
if (!table_name.empty() && isTableIgnored(table_name))
|
||||||
|
{
|
||||||
|
switch (event->header.type)
|
||||||
|
{
|
||||||
|
case WRITE_ROWS_EVENT_V1:
|
||||||
|
case WRITE_ROWS_EVENT_V2:
|
||||||
|
case DELETE_ROWS_EVENT_V1:
|
||||||
|
case DELETE_ROWS_EVENT_V2:
|
||||||
|
case UPDATE_ROWS_EVENT_V1:
|
||||||
|
case UPDATE_ROWS_EVENT_V2:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown event type: {}", magic_enum::enum_name(event->header.type));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void MaterializedMySQLSyncThread::flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata)
|
void MaterializedMySQLSyncThread::flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata)
|
||||||
{
|
{
|
||||||
if (buffers.data.empty())
|
if (buffers.data.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
metadata.transaction(client.getPosition(), [&]() { buffers.commit(getContext()); });
|
metadata.transaction(getPosition(), [&]() { buffers.commit(getContext()); });
|
||||||
|
|
||||||
const auto & position_message = [&]()
|
const auto & position_message = [&]()
|
||||||
{
|
{
|
||||||
WriteBufferFromOwnString buf;
|
WriteBufferFromOwnString buf;
|
||||||
client.getPosition().dump(buf);
|
getPosition().dump(buf);
|
||||||
return buf.str();
|
return buf.str();
|
||||||
};
|
};
|
||||||
LOG_INFO(log, "MySQL executed position: \n {}", position_message());
|
LOG_INFO(log, "MySQL executed position: \n {}", position_message());
|
||||||
@ -783,10 +918,33 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt
|
|||||||
else if (receive_event->type() == MYSQL_QUERY_EVENT)
|
else if (receive_event->type() == MYSQL_QUERY_EVENT)
|
||||||
{
|
{
|
||||||
QueryEvent & query_event = static_cast<QueryEvent &>(*receive_event);
|
QueryEvent & query_event = static_cast<QueryEvent &>(*receive_event);
|
||||||
|
/// Skip events for different databases if any
|
||||||
|
if (!query_event.query_database_name.empty() && query_event.query_database_name != mysql_database_name)
|
||||||
|
{
|
||||||
|
LOG_WARNING(
|
||||||
|
log,
|
||||||
|
"Skipped QueryEvent, current mysql database name: {}, ddl schema: {}, query: {}",
|
||||||
|
mysql_database_name,
|
||||||
|
query_event.query_database_name,
|
||||||
|
query_event.query);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!query_event.query_table_name.empty() && isTableIgnored(query_event.query_table_name))
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "Due to the table filter rules, query_event on {} is ignored.", database_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Position position_before_ddl;
|
Position position_before_ddl;
|
||||||
position_before_ddl.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set);
|
position_before_ddl.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set, query_event.header.timestamp);
|
||||||
metadata.transaction(position_before_ddl, [&]() { buffers.commit(getContext()); });
|
metadata.transaction(position_before_ddl, [&]() { buffers.commit(getContext()); });
|
||||||
metadata.transaction(client.getPosition(),[&](){ executeDDLAtomic(query_event); });
|
metadata.transaction(getPosition(),[&]() { executeDDLAtomic(query_event); });
|
||||||
|
}
|
||||||
|
else if (receive_event->type() == MYSQL_UNPARSED_ROWS_EVENT)
|
||||||
|
{
|
||||||
|
UnparsedRowsEvent & unparsed_event = static_cast<UnparsedRowsEvent &>(*receive_event);
|
||||||
|
auto nested_event = unparsed_event.parse();
|
||||||
|
onEvent(buffers, nested_event, metadata);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -796,7 +954,10 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt
|
|||||||
/// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file.
|
/// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file.
|
||||||
/// To ensure that the synchronization continues, we need to handle these events
|
/// To ensure that the synchronization continues, we need to handle these events
|
||||||
metadata.fetchMasterVariablesValue(pool.get(/* wait_timeout= */ UINT64_MAX));
|
metadata.fetchMasterVariablesValue(pool.get(/* wait_timeout= */ UINT64_MAX));
|
||||||
client.setBinlogChecksum(metadata.binlog_checksum);
|
if (binlog_client)
|
||||||
|
binlog_client->setBinlogChecksum(metadata.binlog_checksum);
|
||||||
|
else
|
||||||
|
client.setBinlogChecksum(metadata.binlog_checksum);
|
||||||
}
|
}
|
||||||
else if (receive_event->header.type != HEARTBEAT_EVENT)
|
else if (receive_event->header.type != HEARTBEAT_EVENT)
|
||||||
{
|
{
|
||||||
@ -827,7 +988,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
|||||||
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
||||||
if (!table_id.table_name.empty())
|
if (!table_id.table_name.empty())
|
||||||
{
|
{
|
||||||
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
|
if (table_id.database_name != mysql_database_name || isTableIgnored(table_id.table_name))
|
||||||
{
|
{
|
||||||
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
|
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
|
||||||
return;
|
return;
|
||||||
@ -845,8 +1006,28 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
|||||||
tryLogCurrentException(log);
|
tryLogCurrentException(log);
|
||||||
|
|
||||||
/// If some DDL query was not successfully parsed and executed
|
/// If some DDL query was not successfully parsed and executed
|
||||||
/// Then replication may fail on next binlog events anyway
|
/// Then replication may fail on next binlog events anyway.
|
||||||
if (exception.code() != ErrorCodes::SYNTAX_ERROR)
|
/// We can skip the error binlog evetns and continue to execute the right ones.
|
||||||
|
/// eg. The user creates a table without primary key and finds it is wrong, then
|
||||||
|
/// drops it and creates a new right one. We guarantee the right one can be executed.
|
||||||
|
|
||||||
|
if (exception.code() != ErrorCodes::SYNTAX_ERROR &&
|
||||||
|
exception.code() != ErrorCodes::MYSQL_SYNTAX_ERROR &&
|
||||||
|
exception.code() != ErrorCodes::NOT_IMPLEMENTED &&
|
||||||
|
exception.code() != ErrorCodes::UNKNOWN_TABLE &&
|
||||||
|
exception.code() != ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY &&
|
||||||
|
exception.code() != ErrorCodes::THERE_IS_NO_QUERY &&
|
||||||
|
exception.code() != ErrorCodes::QUERY_WAS_CANCELLED &&
|
||||||
|
exception.code() != ErrorCodes::TABLE_ALREADY_EXISTS &&
|
||||||
|
exception.code() != ErrorCodes::UNKNOWN_DATABASE &&
|
||||||
|
exception.code() != ErrorCodes::DATABASE_ALREADY_EXISTS &&
|
||||||
|
exception.code() != ErrorCodes::DATABASE_NOT_EMPTY &&
|
||||||
|
exception.code() != ErrorCodes::TABLE_IS_DROPPED &&
|
||||||
|
exception.code() != ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT &&
|
||||||
|
exception.code() != ErrorCodes::CANNOT_CREATE_CHARSET_CONVERTER &&
|
||||||
|
exception.code() != ErrorCodes::UNKNOWN_FUNCTION &&
|
||||||
|
exception.code() != ErrorCodes::UNKNOWN_IDENTIFIER &&
|
||||||
|
exception.code() != ErrorCodes::UNKNOWN_TYPE)
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
# include <DataTypes/DataTypesNumber.h>
|
# include <DataTypes/DataTypesNumber.h>
|
||||||
# include <Databases/DatabaseOrdinary.h>
|
# include <Databases/DatabaseOrdinary.h>
|
||||||
# include <Databases/IDatabase.h>
|
# include <Databases/IDatabase.h>
|
||||||
|
# include <Databases/MySQL/MySQLBinlogClient.h>
|
||||||
# include <Databases/MySQL/MaterializeMetadata.h>
|
# include <Databases/MySQL/MaterializeMetadata.h>
|
||||||
# include <Databases/MySQL/MaterializedMySQLSettings.h>
|
# include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||||
# include <Parsers/ASTCreateQuery.h>
|
# include <Parsers/ASTCreateQuery.h>
|
||||||
@ -45,6 +46,7 @@ public:
|
|||||||
const String & mysql_database_name_,
|
const String & mysql_database_name_,
|
||||||
mysqlxx::Pool && pool_,
|
mysqlxx::Pool && pool_,
|
||||||
MySQLClient && client_,
|
MySQLClient && client_,
|
||||||
|
const MySQLReplication::BinlogClientPtr & binlog_client_,
|
||||||
MaterializedMySQLSettings * settings_);
|
MaterializedMySQLSettings * settings_);
|
||||||
|
|
||||||
void stopSynchronization();
|
void stopSynchronization();
|
||||||
@ -61,19 +63,12 @@ private:
|
|||||||
|
|
||||||
mutable mysqlxx::Pool pool;
|
mutable mysqlxx::Pool pool;
|
||||||
mutable MySQLClient client;
|
mutable MySQLClient client;
|
||||||
|
BinlogClientPtr binlog_client;
|
||||||
|
BinlogPtr binlog;
|
||||||
MaterializedMySQLSettings * settings;
|
MaterializedMySQLSettings * settings;
|
||||||
String query_prefix;
|
String query_prefix;
|
||||||
NameSet materialized_tables_list;
|
NameSet materialized_tables_list;
|
||||||
|
|
||||||
// USE MySQL ERROR CODE:
|
|
||||||
// https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html
|
|
||||||
const int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT
|
|
||||||
const int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT
|
|
||||||
const int ER_BAD_DB_ERROR = 1049; /// NOLINT
|
|
||||||
|
|
||||||
// https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html
|
|
||||||
const int CR_SERVER_LOST = 2013; /// NOLINT
|
|
||||||
|
|
||||||
struct Buffers
|
struct Buffers
|
||||||
{
|
{
|
||||||
String database;
|
String database;
|
||||||
@ -99,12 +94,16 @@ private:
|
|||||||
BufferAndSortingColumnsPtr getTableDataBuffer(const String & table, ContextPtr context);
|
BufferAndSortingColumnsPtr getTableDataBuffer(const String & table, ContextPtr context);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Position getPosition() const { return binlog ? binlog->getPosition() : client.getPosition(); }
|
||||||
void synchronization();
|
void synchronization();
|
||||||
|
|
||||||
bool isCancelled() { return sync_quit.load(std::memory_order_relaxed); }
|
bool isCancelled() { return sync_quit.load(std::memory_order_relaxed); }
|
||||||
|
|
||||||
bool prepareSynchronized(MaterializeMetadata & metadata);
|
bool prepareSynchronized(MaterializeMetadata & metadata);
|
||||||
|
|
||||||
|
bool isTableIgnored(const String & table_name) const;
|
||||||
|
bool ignoreEvent(const BinlogEventPtr & event) const;
|
||||||
|
|
||||||
void flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata);
|
void flushBuffersData(Buffers & buffers, MaterializeMetadata & metadata);
|
||||||
|
|
||||||
void onEvent(Buffers & buffers, const MySQLReplication::BinlogEventPtr & event, MaterializeMetadata & metadata);
|
void onEvent(Buffers & buffers, const MySQLReplication::BinlogEventPtr & event, MaterializeMetadata & metadata);
|
||||||
|
500
src/Databases/MySQL/MySQLBinlog.cpp
Normal file
500
src/Databases/MySQL/MySQLBinlog.cpp
Normal file
@ -0,0 +1,500 @@
|
|||||||
|
#include "MySQLBinlog.h"
|
||||||
|
#include <Core/MySQL/Authentication.h>
|
||||||
|
#include <Core/MySQL/PacketsGeneric.h>
|
||||||
|
#include <Core/MySQL/PacketsConnection.h>
|
||||||
|
#include <Core/MySQL/PacketsProtocolText.h>
|
||||||
|
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||||
|
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
|
||||||
|
#include <Databases/MySQL/tryConvertStringLiterals.h>
|
||||||
|
#include <Common/DNSResolver.h>
|
||||||
|
#include <Common/randomNumber.h>
|
||||||
|
#include <Poco/String.h>
|
||||||
|
#include <IO/ReadBufferFromFile.h>
|
||||||
|
#include <IO/LimitReadBuffer.h>
|
||||||
|
#include <IO/MySQLBinlogEventReadBuffer.h>
|
||||||
|
#include <IO/ReadBufferFromPocoSocket.h>
|
||||||
|
#include <IO/WriteBufferFromPocoSocket.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <IO/WriteBufferFromOStream.h>
|
||||||
|
#include <IO/Operators.h>
|
||||||
|
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
using namespace Replication;
|
||||||
|
using namespace Authentication;
|
||||||
|
using namespace ConnectionPhase;
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int UNKNOWN_EXCEPTION;
|
||||||
|
extern const int UNKNOWN_PACKET_FROM_SERVER;
|
||||||
|
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||||
|
extern const int CANNOT_READ_ALL_DATA;
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int NETWORK_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
class WriteCommand : public IMySQLWritePacket
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const char command;
|
||||||
|
const String query;
|
||||||
|
|
||||||
|
WriteCommand(char command_, const String & query_) : command(command_), query(query_) { }
|
||||||
|
|
||||||
|
size_t getPayloadSize() const override { return 1 + query.size(); }
|
||||||
|
|
||||||
|
void writePayloadImpl(WriteBuffer & buffer) const override
|
||||||
|
{
|
||||||
|
buffer.write(command);
|
||||||
|
if (!query.empty())
|
||||||
|
buffer.write(query.data(), query.size());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
IBinlog::Checksum IBinlog::checksumFromString(const String & checksum)
|
||||||
|
{
|
||||||
|
auto str = Poco::toUpper(checksum);
|
||||||
|
if (str == "CRC32")
|
||||||
|
return IBinlog::CRC32;
|
||||||
|
if (str != "NONE")
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown checksum: {}", checksum);
|
||||||
|
return IBinlog::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogParser::setChecksum(Checksum checksum)
|
||||||
|
{
|
||||||
|
switch (checksum)
|
||||||
|
{
|
||||||
|
case Checksum::CRC32:
|
||||||
|
checksum_signature_length = 4;
|
||||||
|
break;
|
||||||
|
case Checksum::NONE:
|
||||||
|
checksum_signature_length = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogParser::parseEvent(EventHeader & event_header, ReadBuffer & event_payload)
|
||||||
|
{
|
||||||
|
switch (event_header.type)
|
||||||
|
{
|
||||||
|
case FORMAT_DESCRIPTION_EVENT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<FormatDescriptionEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ROTATE_EVENT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<RotateEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QUERY_EVENT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<QueryEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
|
||||||
|
auto query = std::static_pointer_cast<QueryEvent>(event);
|
||||||
|
switch (query->typ)
|
||||||
|
{
|
||||||
|
case QUERY_EVENT_MULTI_TXN_FLAG:
|
||||||
|
case QUERY_EVENT_XA:
|
||||||
|
case QUERY_SAVEPOINT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<DryRunEvent>(EventHeader(query->header));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
String quoted_query = query->query;
|
||||||
|
tryQuoteUnrecognizedTokens(quoted_query);
|
||||||
|
tryConvertStringLiterals(quoted_query);
|
||||||
|
auto table_id = tryParseTableIDFromDDL(query->query, query->schema);
|
||||||
|
query->query_database_name = table_id.database_name;
|
||||||
|
query->query_table_name = table_id.table_name;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case XID_EVENT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<XIDEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TABLE_MAP_EVENT:
|
||||||
|
{
|
||||||
|
TableMapEventHeader map_event_header;
|
||||||
|
map_event_header.parse(event_payload);
|
||||||
|
event = std::make_shared<TableMapEvent>(EventHeader(event_header), map_event_header, flavor_charset);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
auto table_map = std::static_pointer_cast<TableMapEvent>(event);
|
||||||
|
table_maps[table_map->table_id] = table_map;
|
||||||
|
}
|
||||||
|
catch (const Poco::Exception & exc)
|
||||||
|
{
|
||||||
|
/// Ignore parsing issues
|
||||||
|
if (exc.code() != ErrorCodes::UNKNOWN_EXCEPTION)
|
||||||
|
throw;
|
||||||
|
event = std::make_shared<DryRunEvent>(std::move(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case WRITE_ROWS_EVENT_V1:
|
||||||
|
case WRITE_ROWS_EVENT_V2:
|
||||||
|
case DELETE_ROWS_EVENT_V1:
|
||||||
|
case DELETE_ROWS_EVENT_V2:
|
||||||
|
case UPDATE_ROWS_EVENT_V1:
|
||||||
|
case UPDATE_ROWS_EVENT_V2:
|
||||||
|
{
|
||||||
|
RowsEventHeader rows_header(event_header.type);
|
||||||
|
rows_header.parse(event_payload);
|
||||||
|
if (table_maps.contains(rows_header.table_id))
|
||||||
|
event = std::make_shared<UnparsedRowsEvent>(table_maps.at(rows_header.table_id), EventHeader(event_header), rows_header);
|
||||||
|
else
|
||||||
|
event = std::make_shared<DryRunEvent>(std::move(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
if (rows_header.flags & ROWS_END_OF_STATEMENT)
|
||||||
|
table_maps.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case GTID_EVENT:
|
||||||
|
{
|
||||||
|
event = std::make_shared<GTIDEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
event = std::make_shared<DryRunEvent>(EventHeader(event_header));
|
||||||
|
event->parseEvent(event_payload);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
updatePosition(event, position);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogParser::updatePosition(const BinlogEventPtr & event, Position & position)
|
||||||
|
{
|
||||||
|
const UInt64 binlog_pos_prev = position.binlog_pos;
|
||||||
|
position.binlog_pos = event->header.log_pos;
|
||||||
|
if (event->header.timestamp > 0)
|
||||||
|
position.timestamp = event->header.timestamp;
|
||||||
|
|
||||||
|
switch (event->header.type)
|
||||||
|
{
|
||||||
|
case QUERY_EVENT:
|
||||||
|
if (event->type() == MYSQL_UNHANDLED_EVENT)
|
||||||
|
break;
|
||||||
|
[[fallthrough]];
|
||||||
|
case GTID_EVENT:
|
||||||
|
case XID_EVENT:
|
||||||
|
case ROTATE_EVENT:
|
||||||
|
position.update(event);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event->header.type != ROTATE_EVENT)
|
||||||
|
{
|
||||||
|
/// UInt32 overflow when Pos > End_log_pos
|
||||||
|
/// https://dev.mysql.com/doc/refman/8.0/en/show-binlog-events.html
|
||||||
|
/// binlog_pos - The position at which the next event begins, which is equal to Pos plus the size of the event
|
||||||
|
const UInt64 binlog_pos_correct = binlog_pos_prev + event->header.event_size;
|
||||||
|
if (position.binlog_pos < binlog_pos_prev && binlog_pos_correct > std::numeric_limits<UInt32>::max())
|
||||||
|
position.binlog_pos = binlog_pos_correct;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogParser::isNew(const Position & older, const Position & newer)
|
||||||
|
{
|
||||||
|
if (older.gtid_sets.contains(newer.gtid_sets))
|
||||||
|
return false;
|
||||||
|
/// Check if all sets in newer position have the same UUID from older sets
|
||||||
|
std::set<UUID> older_set;
|
||||||
|
for (const auto & set : older.gtid_sets.sets)
|
||||||
|
older_set.insert(set.uuid);
|
||||||
|
for (const auto & set : newer.gtid_sets.sets)
|
||||||
|
{
|
||||||
|
if (!older_set.contains(set.uuid))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromSocket::connect(const String & host, UInt16 port, const String & user, const String & password)
|
||||||
|
{
|
||||||
|
if (connected)
|
||||||
|
disconnect();
|
||||||
|
|
||||||
|
const Poco::Timespan connection_timeout(10'000'000'000);
|
||||||
|
const Poco::Timespan receive_timeout(5'000'000'000);
|
||||||
|
const Poco::Timespan send_timeout(5'000'000'000);
|
||||||
|
|
||||||
|
socket = std::make_unique<Poco::Net::StreamSocket>();
|
||||||
|
address = DNSResolver::instance().resolveAddress(host, port);
|
||||||
|
socket->connect(*address, connection_timeout);
|
||||||
|
socket->setReceiveTimeout(receive_timeout);
|
||||||
|
socket->setSendTimeout(send_timeout);
|
||||||
|
socket->setNoDelay(true);
|
||||||
|
connected = true;
|
||||||
|
|
||||||
|
in = std::make_unique<ReadBufferFromPocoSocket>(*socket);
|
||||||
|
out = std::make_unique<WriteBufferFromPocoSocket>(*socket);
|
||||||
|
packet_endpoint = std::make_shared<MySQLProtocol::PacketEndpoint>(*in, *out, sequence_id);
|
||||||
|
|
||||||
|
handshake(user, password);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromSocket::disconnect()
|
||||||
|
{
|
||||||
|
in = nullptr;
|
||||||
|
out = nullptr;
|
||||||
|
if (socket)
|
||||||
|
socket->close();
|
||||||
|
socket = nullptr;
|
||||||
|
connected = false;
|
||||||
|
sequence_id = 0;
|
||||||
|
|
||||||
|
GTIDSets sets;
|
||||||
|
position.gtid_sets = sets;
|
||||||
|
position.resetPendingGTID();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// https://dev.mysql.com/doc/internals/en/connection-phase-packets.html
|
||||||
|
void BinlogFromSocket::handshake(const String & user, const String & password)
|
||||||
|
{
|
||||||
|
const String mysql_native_password = "mysql_native_password";
|
||||||
|
Handshake handshake;
|
||||||
|
packet_endpoint->receivePacket(handshake);
|
||||||
|
if (handshake.auth_plugin_name != mysql_native_password)
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::UNKNOWN_PACKET_FROM_SERVER,
|
||||||
|
"Only support {} auth plugin name, but got {}",
|
||||||
|
mysql_native_password,
|
||||||
|
handshake.auth_plugin_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
Native41 native41(password, handshake.auth_plugin_data);
|
||||||
|
String auth_plugin_data = native41.getAuthPluginData();
|
||||||
|
|
||||||
|
const UInt8 charset_utf8 = 33;
|
||||||
|
HandshakeResponse handshake_response(
|
||||||
|
client_capabilities, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password);
|
||||||
|
packet_endpoint->sendPacket<HandshakeResponse>(handshake_response, true);
|
||||||
|
|
||||||
|
ResponsePacket packet_response(client_capabilities, true);
|
||||||
|
packet_endpoint->receivePacket(packet_response);
|
||||||
|
packet_endpoint->resetSequenceId();
|
||||||
|
|
||||||
|
if (packet_response.getType() == PACKET_ERR)
|
||||||
|
throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER);
|
||||||
|
else if (packet_response.getType() == PACKET_AUTH_SWITCH)
|
||||||
|
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Access denied for user {}", user);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromSocket::writeCommand(char command, const String & query)
|
||||||
|
{
|
||||||
|
WriteCommand write_command(command, query);
|
||||||
|
packet_endpoint->sendPacket<WriteCommand>(write_command, true);
|
||||||
|
|
||||||
|
ResponsePacket packet_response(client_capabilities);
|
||||||
|
packet_endpoint->receivePacket(packet_response);
|
||||||
|
switch (packet_response.getType())
|
||||||
|
{
|
||||||
|
case PACKET_ERR:
|
||||||
|
throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER);
|
||||||
|
case PACKET_OK:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
packet_endpoint->resetSequenceId();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromSocket::registerSlaveOnMaster(UInt32 slave_id)
|
||||||
|
{
|
||||||
|
RegisterSlave register_slave(slave_id);
|
||||||
|
packet_endpoint->sendPacket<RegisterSlave>(register_slave, true);
|
||||||
|
|
||||||
|
ResponsePacket packet_response(client_capabilities);
|
||||||
|
packet_endpoint->receivePacket(packet_response);
|
||||||
|
packet_endpoint->resetSequenceId();
|
||||||
|
if (packet_response.getType() == PACKET_ERR)
|
||||||
|
throw Exception::createDeprecated(packet_response.err.error_message, ErrorCodes::UNKNOWN_PACKET_FROM_SERVER);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromSocket::start(UInt32 slave_id, const String & executed_gtid_set)
|
||||||
|
{
|
||||||
|
if (!connected)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments:
|
||||||
|
/// Make a notice to the server that this client is checksum-aware.
|
||||||
|
/// It does not need the first fake Rotate necessary checksummed.
|
||||||
|
writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'");
|
||||||
|
|
||||||
|
/// Set heartbeat 1s
|
||||||
|
const UInt64 period_ns = 1'000'000'000;
|
||||||
|
writeCommand(Command::COM_QUERY, "SET @master_heartbeat_period = " + std::to_string(period_ns));
|
||||||
|
|
||||||
|
/// Register slave.
|
||||||
|
registerSlaveOnMaster(slave_id);
|
||||||
|
|
||||||
|
position.gtid_sets = {};
|
||||||
|
position.gtid_sets.parse(executed_gtid_set);
|
||||||
|
|
||||||
|
BinlogDumpGTID binlog_dump(slave_id, position.gtid_sets.toPayload());
|
||||||
|
packet_endpoint->sendPacket<BinlogDumpGTID>(binlog_dump, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
class ReadPacketFromSocket : public IMySQLReadPacket
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using ReadPayloadFunc = std::function<void(ReadBuffer & payload)>;
|
||||||
|
explicit ReadPacketFromSocket(ReadPayloadFunc fn) : read_payload_func(std::move(fn)) { }
|
||||||
|
void readPayloadImpl(ReadBuffer & payload) override;
|
||||||
|
ReadPayloadFunc read_payload_func;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ReadPacketFromSocket::readPayloadImpl(ReadBuffer & payload)
|
||||||
|
{
|
||||||
|
if (payload.eof())
|
||||||
|
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF.");
|
||||||
|
|
||||||
|
UInt8 header = static_cast<unsigned char>(*payload.position());
|
||||||
|
switch (header) // NOLINT(bugprone-switch-missing-default-case)
|
||||||
|
{
|
||||||
|
case PACKET_EOF:
|
||||||
|
throw ReplicationError(ErrorCodes::CANNOT_READ_ALL_DATA, "Master maybe lost");
|
||||||
|
case PACKET_ERR:
|
||||||
|
{
|
||||||
|
ERRPacket err;
|
||||||
|
err.readPayloadWithUnpacked(payload);
|
||||||
|
throw ReplicationError::createDeprecated(err.error_message, ErrorCodes::UNKNOWN_EXCEPTION);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/// Skip the generic response packets header flag
|
||||||
|
payload.ignore(1);
|
||||||
|
read_payload_func(payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogFromSocket::tryReadEvent(BinlogEventPtr & to, UInt64 ms)
|
||||||
|
{
|
||||||
|
ReadPacketFromSocket packet([this](ReadBuffer & payload)
|
||||||
|
{
|
||||||
|
MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length);
|
||||||
|
|
||||||
|
EventHeader event_header;
|
||||||
|
event_header.parse(event_payload);
|
||||||
|
|
||||||
|
parseEvent(event_header, event_payload);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (packet_endpoint && packet_endpoint->tryReceivePacket(packet, ms))
|
||||||
|
{
|
||||||
|
to = event;
|
||||||
|
return static_cast<bool>(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromFile::open(const String & filename)
|
||||||
|
{
|
||||||
|
in = std::make_unique<ReadBufferFromFile>(filename);
|
||||||
|
assertString("\xfe\x62\x69\x6e", *in); /// magic number
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogFromFile::tryReadEvent(BinlogEventPtr & to, UInt64 /*ms*/)
|
||||||
|
{
|
||||||
|
if (in && !in->eof())
|
||||||
|
{
|
||||||
|
EventHeader event_header;
|
||||||
|
event_header.parse(*in);
|
||||||
|
|
||||||
|
LimitReadBuffer limit_read_buffer(*in, event_header.event_size - EVENT_HEADER_LENGTH, /* throw_exception */ false, /* exact_limit */ {});
|
||||||
|
MySQLBinlogEventReadBuffer event_payload(limit_read_buffer, checksum_signature_length);
|
||||||
|
parseEvent(event_header, event_payload);
|
||||||
|
to = event;
|
||||||
|
return static_cast<bool>(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogFromFileFactory::BinlogFromFileFactory(const String & filename_)
|
||||||
|
: filename(filename_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogFromFileFactory::createBinlog(const String & executed_gtid_set)
|
||||||
|
{
|
||||||
|
auto ret = std::make_shared<BinlogFromFile>();
|
||||||
|
ret->open(filename);
|
||||||
|
if (!executed_gtid_set.empty())
|
||||||
|
{
|
||||||
|
/// NOTE: Used for testing only!
|
||||||
|
GTIDSets sets;
|
||||||
|
sets.parse(executed_gtid_set);
|
||||||
|
if (sets.sets.size() != 1 || sets.sets[0].intervals.size() != 1)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many intervals: {}", executed_gtid_set);
|
||||||
|
BinlogEventPtr event;
|
||||||
|
while (ret->tryReadEvent(event, 0))
|
||||||
|
{
|
||||||
|
const auto & s = ret->getPosition().gtid_sets.sets;
|
||||||
|
if (!s.empty() && !s[0].intervals.empty() && s[0].intervals[0].end == sets.sets[0].intervals[0].end)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pos = ret->getPosition();
|
||||||
|
pos.gtid_sets.sets.front().intervals.front().start = sets.sets.front().intervals.front().start;
|
||||||
|
ret->setPosition(pos);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogFromSocketFactory::BinlogFromSocketFactory(const String & host_, UInt16 port_, const String & user_, const String & password_)
|
||||||
|
: host(host_)
|
||||||
|
, port(port_)
|
||||||
|
, user(user_)
|
||||||
|
, password(password_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogFromSocketFactory::createBinlog(const String & executed_gtid_set)
|
||||||
|
{
|
||||||
|
auto ret = std::make_shared<BinlogFromSocket>();
|
||||||
|
ret->connect(host, port, user, password);
|
||||||
|
ret->start(randomNumber(), executed_gtid_set);
|
||||||
|
auto pos = ret->getPosition();
|
||||||
|
if (pos.gtid_sets.sets.empty() || pos.gtid_sets.sets.front().intervals.front().start != 1)
|
||||||
|
throw Exception(ErrorCodes::NETWORK_ERROR, "Could not create: Wrong executed_gtid_set: {} -> {}", executed_gtid_set, pos.gtid_sets.toString());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Should be in MySQLReplication namespace
|
||||||
|
bool operator==(const Position & left, const Position & right)
|
||||||
|
{
|
||||||
|
return left.binlog_name == right.binlog_name &&
|
||||||
|
left.binlog_pos == right.binlog_pos &&
|
||||||
|
left.gtid_sets == right.gtid_sets;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
120
src/Databases/MySQL/MySQLBinlog.h
Normal file
120
src/Databases/MySQL/MySQLBinlog.h
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/MySQL/MySQLReplication.h>
|
||||||
|
#include <Core/MySQL/PacketsGeneric.h>
|
||||||
|
#include <Poco/Net/StreamSocket.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
using namespace MySQLProtocol;
|
||||||
|
using namespace Generic;
|
||||||
|
|
||||||
|
namespace MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
class IBinlog
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~IBinlog() = default;
|
||||||
|
virtual bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) = 0;
|
||||||
|
virtual Position getPosition() const = 0;
|
||||||
|
enum Checksum : UInt8
|
||||||
|
{
|
||||||
|
NONE = 0,
|
||||||
|
CRC32 = 1
|
||||||
|
};
|
||||||
|
virtual void setChecksum(Checksum /*checksum*/) { }
|
||||||
|
static Checksum checksumFromString(const String & checksum);
|
||||||
|
};
|
||||||
|
|
||||||
|
using BinlogPtr = std::shared_ptr<IBinlog>;
|
||||||
|
|
||||||
|
class BinlogParser : public IBinlog
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Position getPosition() const override { return position; }
|
||||||
|
void setPosition(const Position & position_) { position = position_; }
|
||||||
|
void setChecksum(Checksum checksum) override;
|
||||||
|
static void updatePosition(const BinlogEventPtr & event, Position & position);
|
||||||
|
/// Checks if \a older is older position than \a newer
|
||||||
|
static bool isNew(const Position & older, const Position & newer);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Position position;
|
||||||
|
BinlogEventPtr event;
|
||||||
|
std::map<UInt64, std::shared_ptr<TableMapEvent>> table_maps;
|
||||||
|
size_t checksum_signature_length = 4;
|
||||||
|
MySQLCharsetPtr flavor_charset = std::make_shared<MySQLCharset>();
|
||||||
|
void parseEvent(EventHeader & event_header, ReadBuffer & event_payload);
|
||||||
|
};
|
||||||
|
|
||||||
|
class BinlogFromSocket : public BinlogParser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void connect(const String & host, UInt16 port, const String & user, const String & password);
|
||||||
|
void start(UInt32 slave_id, const String & executed_gtid_set);
|
||||||
|
bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void disconnect();
|
||||||
|
bool connected = false;
|
||||||
|
uint8_t sequence_id = 0;
|
||||||
|
const uint32_t client_capabilities = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION;
|
||||||
|
|
||||||
|
std::unique_ptr<ReadBuffer> in;
|
||||||
|
std::unique_ptr<WriteBuffer> out;
|
||||||
|
std::unique_ptr<Poco::Net::StreamSocket> socket;
|
||||||
|
std::optional<Poco::Net::SocketAddress> address;
|
||||||
|
std::shared_ptr<MySQLProtocol::PacketEndpoint> packet_endpoint;
|
||||||
|
|
||||||
|
void handshake(const String & user, const String & password);
|
||||||
|
void registerSlaveOnMaster(UInt32 slave_id);
|
||||||
|
void writeCommand(char command, const String & query);
|
||||||
|
};
|
||||||
|
|
||||||
|
class BinlogFromFile : public BinlogParser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void open(const String & filename);
|
||||||
|
bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<ReadBuffer> in;
|
||||||
|
};
|
||||||
|
|
||||||
|
class IBinlogFactory
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~IBinlogFactory() = default;
|
||||||
|
virtual BinlogPtr createBinlog(const String & executed_gtid_set) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using BinlogFactoryPtr = std::shared_ptr<IBinlogFactory>;
|
||||||
|
|
||||||
|
class BinlogFromFileFactory : public IBinlogFactory
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BinlogFromFileFactory(const String & filename_);
|
||||||
|
BinlogPtr createBinlog(const String & executed_gtid_set) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const String filename;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BinlogFromSocketFactory : public IBinlogFactory
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BinlogFromSocketFactory(const String & host_, UInt16 port_, const String & user_, const String & password_);
|
||||||
|
BinlogPtr createBinlog(const String & executed_gtid_set) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const String host;
|
||||||
|
const UInt16 port;
|
||||||
|
const String user;
|
||||||
|
const String password;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool operator==(const Position & left, const Position & right);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
104
src/Databases/MySQL/MySQLBinlogClient.cpp
Normal file
104
src/Databases/MySQL/MySQLBinlogClient.cpp
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#include "MySQLBinlogClient.h"
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
|
namespace DB::ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
BinlogClient::BinlogClient(const BinlogFactoryPtr & factory_,
|
||||||
|
const String & name,
|
||||||
|
UInt64 max_bytes_in_buffer_,
|
||||||
|
UInt64 max_flush_ms_)
|
||||||
|
: factory(factory_)
|
||||||
|
, binlog_client_name(name)
|
||||||
|
, max_bytes_in_buffer(max_bytes_in_buffer_)
|
||||||
|
, max_flush_ms(max_flush_ms_)
|
||||||
|
, logger(&Poco::Logger::get("BinlogClient(" + name + ")"))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogClient::createBinlog(const String & executed_gtid_set,
|
||||||
|
const String & name,
|
||||||
|
const NameSet & mysql_database_names,
|
||||||
|
size_t max_bytes,
|
||||||
|
UInt64 max_waiting_ms)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
BinlogPtr ret;
|
||||||
|
for (auto it = dispatchers.begin(); it != dispatchers.end();)
|
||||||
|
{
|
||||||
|
auto & dispatcher = *it;
|
||||||
|
if (!ret)
|
||||||
|
{
|
||||||
|
const auto metadata = dispatcher->getDispatcherMetadata();
|
||||||
|
LOG_DEBUG(logger, "({} -> {}): Trying dispatcher: {}, size: {} -> {}:{}.{}",
|
||||||
|
name, executed_gtid_set, metadata.name, metadata.binlogs.size(),
|
||||||
|
metadata.position.binlog_name, metadata.position.gtid_sets.toString(), metadata.position.binlog_pos);
|
||||||
|
ret = dispatcher->attach(executed_gtid_set, name, mysql_database_names, max_bytes, max_waiting_ms);
|
||||||
|
if (ret)
|
||||||
|
LOG_DEBUG(logger, "({} -> {}): Reused dispatcher: {}, size: {} -> {}:{}.{}",
|
||||||
|
name, executed_gtid_set, metadata.name, metadata.binlogs.size(),
|
||||||
|
metadata.position.binlog_name, metadata.position.gtid_sets.toString(), metadata.position.binlog_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispatcher->cleanupBinlogsAndStop())
|
||||||
|
{
|
||||||
|
const auto metadata = dispatcher->getDispatcherMetadata();
|
||||||
|
LOG_DEBUG(logger, "({} -> {}): Deleting dispatcher: {}, size: {}, total dispatchers: {}",
|
||||||
|
name, executed_gtid_set, metadata.name, metadata.binlogs.size(), dispatchers.size());
|
||||||
|
it = dispatchers.erase(it);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
{
|
||||||
|
String dispatcher_name = name + ":" + std::to_string(dispatchers_count++);
|
||||||
|
LOG_DEBUG(logger, "({} -> {}): Creating dispatcher: {}, total dispatchers: {}",
|
||||||
|
name, executed_gtid_set, dispatcher_name, dispatchers.size());
|
||||||
|
auto dispatcher = std::make_shared<BinlogEventsDispatcher>(dispatcher_name, max_bytes_in_buffer, max_flush_ms);
|
||||||
|
if (!binlog_checksum.empty())
|
||||||
|
dispatcher->setBinlogChecksum(binlog_checksum);
|
||||||
|
for (const auto & it : dispatchers)
|
||||||
|
dispatcher->syncTo(it);
|
||||||
|
ret = dispatcher->start(factory->createBinlog(executed_gtid_set), name, mysql_database_names, max_bytes, max_waiting_ms);
|
||||||
|
if (!ret)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not create binlog: {}", executed_gtid_set);
|
||||||
|
dispatchers.push_back(dispatcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogClient::Metadata BinlogClient::getMetadata() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
Metadata ret;
|
||||||
|
ret.binlog_client_name = binlog_client_name;
|
||||||
|
for (const auto & dispatcher : dispatchers)
|
||||||
|
{
|
||||||
|
auto metadata = dispatcher->getDispatcherMetadata();
|
||||||
|
if (!metadata.binlogs.empty())
|
||||||
|
ret.dispatchers.push_back(metadata);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogClient::setBinlogChecksum(const String & checksum)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (binlog_checksum != checksum)
|
||||||
|
{
|
||||||
|
LOG_DEBUG(logger, "Setting binlog_checksum: {} -> {}, total dispatchers: {}", binlog_checksum, checksum, dispatchers.size());
|
||||||
|
binlog_checksum = checksum;
|
||||||
|
for (const auto & dispatcher : dispatchers)
|
||||||
|
dispatcher->setBinlogChecksum(checksum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
57
src/Databases/MySQL/MySQLBinlogClient.h
Normal file
57
src/Databases/MySQL/MySQLBinlogClient.h
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Databases/MySQL/MySQLBinlog.h>
|
||||||
|
#include <Databases/MySQL/MySQLBinlogEventsDispatcher.h>
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
/** It is supposed to reduce the number of connections to remote MySQL binlog by reusing one connection between several consumers.
|
||||||
|
* Such reusing of the connection makes the time of reading from the remote binlog independent to number of the consumers.
|
||||||
|
* It tracks a list of BinlogEventsDispatcher instances for consumers with different binlog position.
|
||||||
|
* The dispatchers with the same binlog position will be merged to one.
|
||||||
|
*/
|
||||||
|
class BinlogClient
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BinlogClient(const BinlogFactoryPtr & factory,
|
||||||
|
const String & name = {},
|
||||||
|
UInt64 max_bytes_in_buffer_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
|
UInt64 max_flush_ms_ = 1000);
|
||||||
|
BinlogClient(const BinlogClient & other) = delete;
|
||||||
|
~BinlogClient() = default;
|
||||||
|
BinlogClient & operator=(const BinlogClient & other) = delete;
|
||||||
|
|
||||||
|
/// Creates a binlog to receive events
|
||||||
|
BinlogPtr createBinlog(const String & executed_gtid_set = {},
|
||||||
|
const String & name = {},
|
||||||
|
const NameSet & mysql_database_names = {},
|
||||||
|
size_t max_bytes = 0,
|
||||||
|
UInt64 max_waiting_ms = 0);
|
||||||
|
|
||||||
|
/// The binlog checksum is related to entire connection
|
||||||
|
void setBinlogChecksum(const String & checksum);
|
||||||
|
|
||||||
|
struct Metadata
|
||||||
|
{
|
||||||
|
String binlog_client_name;
|
||||||
|
std::vector<BinlogEventsDispatcher::DispatcherMetadata> dispatchers;
|
||||||
|
};
|
||||||
|
/// Returns only not empty dispatchers
|
||||||
|
Metadata getMetadata() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
BinlogFactoryPtr factory;
|
||||||
|
const String binlog_client_name;
|
||||||
|
UInt64 max_bytes_in_buffer = 0;
|
||||||
|
UInt64 max_flush_ms = 0;
|
||||||
|
std::vector<BinlogEventsDispatcherPtr> dispatchers;
|
||||||
|
String binlog_checksum;
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
Poco::Logger * logger = nullptr;
|
||||||
|
int dispatchers_count = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using BinlogClientPtr = std::shared_ptr<BinlogClient>;
|
||||||
|
|
||||||
|
}
|
46
src/Databases/MySQL/MySQLBinlogClientFactory.cpp
Normal file
46
src/Databases/MySQL/MySQLBinlogClientFactory.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#include <Databases/MySQL/MySQLBinlogClientFactory.h>
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
BinlogClientFactory & BinlogClientFactory::instance()
|
||||||
|
{
|
||||||
|
static BinlogClientFactory ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogClientPtr BinlogClientFactory::getClient(const String & host, UInt16 port, const String & user, const String & password, UInt64 max_bytes_in_buffer, UInt64 max_flush_ms)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
String binlog_client_name;
|
||||||
|
WriteBufferFromString stream(binlog_client_name);
|
||||||
|
stream << user << "@" << host << ":" << port;
|
||||||
|
stream.finalize();
|
||||||
|
String binlog_client_key = binlog_client_name + ":" + password;
|
||||||
|
auto it = clients.find(binlog_client_key);
|
||||||
|
BinlogClientPtr ret = it != clients.end() ? it->second.lock() : nullptr;
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
auto factory = std::make_shared<BinlogFromSocketFactory>(host, port, user, password);
|
||||||
|
auto client = std::make_shared<BinlogClient>(factory, binlog_client_name, max_bytes_in_buffer, max_flush_ms);
|
||||||
|
clients[binlog_client_key] = client;
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BinlogClient::Metadata> BinlogClientFactory::getMetadata() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
std::vector<BinlogClient::Metadata> ret;
|
||||||
|
for (const auto & it : clients)
|
||||||
|
{
|
||||||
|
if (auto c = it.second.lock())
|
||||||
|
{
|
||||||
|
auto metadata = c->getMetadata();
|
||||||
|
if (!metadata.dispatchers.empty())
|
||||||
|
ret.push_back(metadata);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
38
src/Databases/MySQL/MySQLBinlogClientFactory.h
Normal file
38
src/Databases/MySQL/MySQLBinlogClientFactory.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Databases/MySQL/MySQLBinlogClient.h>
|
||||||
|
#include <IO/WriteBufferFromString.h>
|
||||||
|
#include <IO/Operators.h>
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
/** Global instance to create or reuse MySQL Binlog Clients.
|
||||||
|
* If a binlog client already exists for specific params,
|
||||||
|
* it will be returned and reused to read binlog events from MySQL.
|
||||||
|
* Otherwise new instance will be created.
|
||||||
|
*/
|
||||||
|
class BinlogClientFactory final : boost::noncopyable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static BinlogClientFactory & instance();
|
||||||
|
|
||||||
|
BinlogClientPtr getClient(const String & host,
|
||||||
|
UInt16 port,
|
||||||
|
const String & user,
|
||||||
|
const String & password,
|
||||||
|
UInt64 max_bytes_in_buffer,
|
||||||
|
UInt64 max_flush_ms);
|
||||||
|
|
||||||
|
/// Returns info of all registered clients
|
||||||
|
std::vector<BinlogClient::Metadata> getMetadata() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
BinlogClientFactory() = default;
|
||||||
|
|
||||||
|
// Keeps track of already destroyed clients
|
||||||
|
std::unordered_map<String, std::weak_ptr<BinlogClient>> clients;
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
626
src/Databases/MySQL/MySQLBinlogEventsDispatcher.cpp
Normal file
626
src/Databases/MySQL/MySQLBinlogEventsDispatcher.cpp
Normal file
@ -0,0 +1,626 @@
|
|||||||
|
#include "MySQLBinlogEventsDispatcher.h"
|
||||||
|
#include <boost/algorithm/string/join.hpp>
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
|
namespace DB::ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int UNKNOWN_EXCEPTION;
|
||||||
|
extern const int TIMEOUT_EXCEEDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
class BinlogFromDispatcher : public IBinlog
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BinlogFromDispatcher(const String & name_, const NameSet & mysql_database_names_, size_t max_bytes_, UInt64 max_waiting_ms_)
|
||||||
|
: name(name_)
|
||||||
|
, mysql_database_names(mysql_database_names_)
|
||||||
|
, max_bytes(max_bytes_)
|
||||||
|
, max_waiting_ms(max_waiting_ms_)
|
||||||
|
, logger(&Poco::Logger::get("BinlogFromDispatcher(" + name + ")"))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
~BinlogFromDispatcher() override
|
||||||
|
{
|
||||||
|
stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void stop()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (is_cancelled)
|
||||||
|
return;
|
||||||
|
is_cancelled = true;
|
||||||
|
}
|
||||||
|
cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string getName() const
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool tryReadEvent(BinlogEventPtr & to, UInt64 ms) override;
|
||||||
|
Position getPosition() const override;
|
||||||
|
void setPosition(const Position & initial, const Position & wait);
|
||||||
|
void setException(const std::exception_ptr & exception_);
|
||||||
|
void push(const BinlogEventsDispatcher::Buffer & buffer);
|
||||||
|
BinlogEventsDispatcher::BinlogMetadata getBinlogMetadata() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const String name;
|
||||||
|
const NameSet mysql_database_names;
|
||||||
|
const size_t max_bytes = 0;
|
||||||
|
const UInt64 max_waiting_ms = 0;
|
||||||
|
|
||||||
|
Position position;
|
||||||
|
GTIDSets gtid_sets_wait;
|
||||||
|
|
||||||
|
BinlogEventsDispatcher::Buffer buffer;
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
|
||||||
|
std::condition_variable cv;
|
||||||
|
bool is_cancelled = false;
|
||||||
|
Poco::Logger * logger = nullptr;
|
||||||
|
std::exception_ptr exception;
|
||||||
|
};
|
||||||
|
|
||||||
|
static String getBinlogNames(const std::vector<std::weak_ptr<BinlogFromDispatcher>> & binlogs)
|
||||||
|
{
|
||||||
|
std::vector<String> names;
|
||||||
|
for (const auto & it : binlogs)
|
||||||
|
{
|
||||||
|
if (auto binlog = it.lock())
|
||||||
|
names.push_back(binlog->getName());
|
||||||
|
}
|
||||||
|
return boost::algorithm::join(names, ", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogEventsDispatcher::BinlogEventsDispatcher(const String & logger_name_, size_t max_bytes_in_buffer_, UInt64 max_flush_ms_)
|
||||||
|
: logger_name(logger_name_)
|
||||||
|
, max_bytes_in_buffer(max_bytes_in_buffer_)
|
||||||
|
, max_flush_ms(max_flush_ms_)
|
||||||
|
, logger(&Poco::Logger::get("BinlogEventsDispatcher(" + logger_name + ")"))
|
||||||
|
, dispatching_thread(std::make_unique<ThreadFromGlobalPool>([this]() { dispatchEvents(); }))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogEventsDispatcher::~BinlogEventsDispatcher()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
is_cancelled = true;
|
||||||
|
auto exc = std::make_exception_ptr(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Dispatcher {} has been already destroyed", logger_name));
|
||||||
|
try
|
||||||
|
{
|
||||||
|
cleanupLocked([&](const auto & binlog)
|
||||||
|
{
|
||||||
|
/// Notify the binlogs that the dispatcher is already destroyed
|
||||||
|
/// and it needs to recreate new binlogs if needed
|
||||||
|
binlog->setException(exc);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (const std::exception & exc)
|
||||||
|
{
|
||||||
|
LOG_ERROR(logger, "Unexpected exception: {}", exc.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv.notify_all();
|
||||||
|
if (dispatching_thread)
|
||||||
|
dispatching_thread->join();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void flushTimers(Stopwatch & watch, UInt64 & total_time, UInt64 & size, float & size_per_sec, UInt64 & bytes, float & bytes_per_sec, float threshold_flush, float threshold_reset)
|
||||||
|
{
|
||||||
|
total_time += watch.elapsedMicroseconds();
|
||||||
|
const float elapsed_seconds = total_time * 1e-6f;
|
||||||
|
if (elapsed_seconds >= threshold_flush)
|
||||||
|
{
|
||||||
|
size_per_sec = size / elapsed_seconds;
|
||||||
|
bytes_per_sec = bytes / elapsed_seconds;
|
||||||
|
}
|
||||||
|
if (elapsed_seconds >= threshold_reset)
|
||||||
|
{
|
||||||
|
size = 0;
|
||||||
|
bytes = 0;
|
||||||
|
total_time = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::flushBufferLocked()
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
if (buffer.bytes)
|
||||||
|
cleanupLocked([&](const auto & b) { b->push(buffer); });
|
||||||
|
events_flush += buffer.events.size();
|
||||||
|
bytes_flush += buffer.bytes;
|
||||||
|
flushTimers(watch, events_flush_total_time, events_flush, events_flush_per_sec, bytes_flush, bytes_flush_per_sec, 0.1f, 1.0);
|
||||||
|
buffer = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isDispatcherEventIgnored(const BinlogEventPtr & event)
|
||||||
|
{
|
||||||
|
switch (event->header.type)
|
||||||
|
{
|
||||||
|
/// Sending to all databases:
|
||||||
|
case GTID_EVENT: /// Catch up requested executed gtid set, used only in BinlogFromDispatcher
|
||||||
|
case ROTATE_EVENT: /// Change binlog_checksum
|
||||||
|
case XID_EVENT: /// Commit transaction
|
||||||
|
/// Sending to all attached binlogs without filtering on dispatcher thread
|
||||||
|
/// to keep the connection as up-to-date as possible,
|
||||||
|
/// but these events should be filtered on databases' threads
|
||||||
|
/// and sent only to requested databases:
|
||||||
|
case QUERY_EVENT: /// Apply DDL
|
||||||
|
case WRITE_ROWS_EVENT_V1: /// Apply DML
|
||||||
|
case WRITE_ROWS_EVENT_V2:
|
||||||
|
case DELETE_ROWS_EVENT_V1:
|
||||||
|
case DELETE_ROWS_EVENT_V2:
|
||||||
|
case UPDATE_ROWS_EVENT_V1:
|
||||||
|
case UPDATE_ROWS_EVENT_V2:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::dispatchEvents()
|
||||||
|
{
|
||||||
|
LOG_TRACE(logger, "{}: started", __FUNCTION__);
|
||||||
|
BinlogEventPtr event;
|
||||||
|
BinlogPtr binlog_;
|
||||||
|
Stopwatch watch;
|
||||||
|
UInt64 events_read = 0;
|
||||||
|
UInt64 bytes_read = 0;
|
||||||
|
UInt64 events_read_total_time = 0;
|
||||||
|
Stopwatch watch_events_read;
|
||||||
|
|
||||||
|
while (!is_cancelled)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock lock(mutex);
|
||||||
|
cv.wait(lock, [&] { return is_cancelled || (binlog_read_from && !binlogs.empty()); });
|
||||||
|
if (is_cancelled)
|
||||||
|
break;
|
||||||
|
|
||||||
|
for (auto it = sync_to.begin(); it != sync_to.end() && !binlogs.empty();)
|
||||||
|
{
|
||||||
|
if (auto d = it->lock())
|
||||||
|
{
|
||||||
|
/// If we can catch up the position of a dispatcher we synced to,
|
||||||
|
/// need to move all binlogs out
|
||||||
|
if (trySyncLocked(d))
|
||||||
|
{
|
||||||
|
/// Don't keep connection longer than needed
|
||||||
|
stopLocked();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
it = sync_to.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (binlog_read_from)
|
||||||
|
binlog_read_from->setChecksum(binlog_checksum);
|
||||||
|
binlog_ = binlog_read_from;
|
||||||
|
if (watch.elapsedMilliseconds() >= max_flush_ms || buffer.bytes >= max_bytes_in_buffer)
|
||||||
|
{
|
||||||
|
flushBufferLocked();
|
||||||
|
watch.restart();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
watch_events_read.restart();
|
||||||
|
if (!is_cancelled && binlog_ && binlog_->tryReadEvent(event, max_flush_ms) && event)
|
||||||
|
{
|
||||||
|
++events_read;
|
||||||
|
bytes_read += event->header.event_size;
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
flushTimers(watch_events_read, events_read_total_time, events_read, events_read_per_sec, bytes_read, bytes_read_per_sec, 1.0, 5.0);
|
||||||
|
BinlogParser::updatePosition(event, position);
|
||||||
|
/// Ignore meaningless events
|
||||||
|
if (isDispatcherEventIgnored(event))
|
||||||
|
continue;
|
||||||
|
buffer.events.push_back(event);
|
||||||
|
buffer.bytes += event->header.event_size;
|
||||||
|
buffer.position = position;
|
||||||
|
/// Deliver ROTATE event ASAP if there binlog_checksum should be changed
|
||||||
|
if (event->header.type == ROTATE_EVENT)
|
||||||
|
flushBufferLocked();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception & exc)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
LOG_ERROR(logger, "Exception: {}", exc.what());
|
||||||
|
stopLocked();
|
||||||
|
/// All attached binlogs should be recreated
|
||||||
|
cleanupLocked([&](const auto & b) { b->setException(std::current_exception()); });
|
||||||
|
binlogs.clear();
|
||||||
|
buffer = {};
|
||||||
|
position = {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG_TRACE(logger, "{}: finished", __FUNCTION__);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogEventsDispatcher::cleanupLocked(const std::function<void(const std::shared_ptr<BinlogFromDispatcher> & binlog)> & fn)
|
||||||
|
{
|
||||||
|
for (auto it = binlogs.begin(); it != binlogs.end();)
|
||||||
|
{
|
||||||
|
if (auto binlog = it->lock())
|
||||||
|
{
|
||||||
|
if (fn)
|
||||||
|
fn(binlog);
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
it = binlogs.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return binlogs.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogEventsDispatcher::cleanupBinlogsAndStop()
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
const bool is_empty = cleanupLocked();
|
||||||
|
if (is_empty && binlog_read_from)
|
||||||
|
stopLocked();
|
||||||
|
return is_empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::stopLocked()
|
||||||
|
{
|
||||||
|
if (!binlog_read_from)
|
||||||
|
{
|
||||||
|
LOG_DEBUG(logger, "Could not stop. Already stopped");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanupLocked();
|
||||||
|
binlog_read_from = nullptr;
|
||||||
|
LOG_DEBUG(logger, "Stopped: {}:{}.{}: ({})", position.binlog_name, position.gtid_sets.toString(), position.binlog_pos, getBinlogNames(binlogs));
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogEventsDispatcher::createBinlogLocked(const String & name_,
|
||||||
|
const NameSet & mysql_database_names,
|
||||||
|
size_t max_bytes,
|
||||||
|
UInt64 max_waiting_ms,
|
||||||
|
const Position & pos_initial,
|
||||||
|
const Position & pos_wait)
|
||||||
|
{
|
||||||
|
static int client_cnt = 0;
|
||||||
|
const String client_id = !name_.empty() ? name_ : "binlog_" + std::to_string(++client_cnt);
|
||||||
|
auto binlog = std::make_shared<BinlogFromDispatcher>(client_id, mysql_database_names, max_bytes, max_waiting_ms);
|
||||||
|
binlogs.push_back(binlog);
|
||||||
|
binlog->setPosition(pos_initial, pos_wait);
|
||||||
|
LOG_DEBUG(logger, "Created binlog: {} -> {}", name_, binlog->getPosition().gtid_sets.toString());
|
||||||
|
return binlog;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogEventsDispatcher::start(const BinlogPtr & binlog_read_from_,
|
||||||
|
const String & name_,
|
||||||
|
const NameSet & mysql_database_names,
|
||||||
|
size_t max_bytes,
|
||||||
|
UInt64 max_waiting_ms)
|
||||||
|
{
|
||||||
|
BinlogPtr ret;
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (is_started)
|
||||||
|
return {};
|
||||||
|
binlog_read_from = binlog_read_from_;
|
||||||
|
/// It is used for catching up
|
||||||
|
/// binlog_read_from should return position with requested executed GTID set: 1-N
|
||||||
|
position = binlog_read_from->getPosition();
|
||||||
|
ret = createBinlogLocked(name_, mysql_database_names, max_bytes, max_waiting_ms, position);
|
||||||
|
is_started = true;
|
||||||
|
}
|
||||||
|
cv.notify_all();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogPtr BinlogEventsDispatcher::attach(const String & executed_gtid_set,
|
||||||
|
const String & name_,
|
||||||
|
const NameSet & mysql_database_names,
|
||||||
|
size_t max_bytes,
|
||||||
|
UInt64 max_waiting_ms)
|
||||||
|
{
|
||||||
|
BinlogPtr ret;
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
/// Check if binlog_read_from can be reused:
|
||||||
|
/// Attach to only active dispatchers
|
||||||
|
/// and if executed_gtid_set is higher value than current
|
||||||
|
if (!binlog_read_from || !is_started || cleanupLocked() || executed_gtid_set.empty())
|
||||||
|
return {};
|
||||||
|
Position pos_wait;
|
||||||
|
pos_wait.gtid_sets.parse(executed_gtid_set);
|
||||||
|
if (!BinlogParser::isNew(position, pos_wait))
|
||||||
|
return {};
|
||||||
|
ret = createBinlogLocked(name_, mysql_database_names, max_bytes, max_waiting_ms, position, pos_wait);
|
||||||
|
}
|
||||||
|
cv.notify_all();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::syncToLocked(const BinlogEventsDispatcherPtr & to)
|
||||||
|
{
|
||||||
|
if (to && this != to.get())
|
||||||
|
{
|
||||||
|
std::vector<String> names;
|
||||||
|
for (const auto & it : sync_to)
|
||||||
|
{
|
||||||
|
if (auto dispatcher = it.lock())
|
||||||
|
names.push_back(dispatcher->logger_name);
|
||||||
|
}
|
||||||
|
LOG_DEBUG(logger, "Syncing -> ({}) + ({})", boost::algorithm::join(names, ", "), to->logger_name);
|
||||||
|
sync_to.emplace_back(to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::syncTo(const BinlogEventsDispatcherPtr & to)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
syncToLocked(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
Position BinlogEventsDispatcher::getPosition() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogEventsDispatcher::trySyncLocked(BinlogEventsDispatcherPtr & to)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(to->mutex);
|
||||||
|
/// Don't catch up if positions do not have GTIDs yet
|
||||||
|
const auto & cur_sets = position.gtid_sets.sets;
|
||||||
|
const auto & sets = to->position.gtid_sets.sets;
|
||||||
|
/// Sync to only started dispatchers
|
||||||
|
if (!to->binlog_read_from || (cur_sets.empty() && sets.empty()) || to->position != position)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
flushBufferLocked();
|
||||||
|
to->flushBufferLocked();
|
||||||
|
LOG_DEBUG(logger, "Synced up: {} -> {}: {}:{}.{}: ({}) + ({})", logger_name, to->logger_name,
|
||||||
|
position.binlog_name, position.gtid_sets.toString(), position.binlog_pos, getBinlogNames(to->binlogs), getBinlogNames(binlogs));
|
||||||
|
std::move(binlogs.begin(), binlogs.end(), std::back_inserter(to->binlogs));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Notify that new binlogs arrived
|
||||||
|
to->cv.notify_all();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogEventsDispatcher::setBinlogChecksum(const String & checksum)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
LOG_DEBUG(logger, "Setting binlog_checksum: {}", checksum);
|
||||||
|
binlog_checksum = IBinlog::checksumFromString(checksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromDispatcher::push(const BinlogEventsDispatcher::Buffer & buffer_)
|
||||||
|
{
|
||||||
|
std::unique_lock lock(mutex);
|
||||||
|
cv.wait_for(lock, std::chrono::milliseconds(max_waiting_ms),
|
||||||
|
[&]
|
||||||
|
{
|
||||||
|
bool ret = is_cancelled || exception || max_bytes == 0 || buffer.bytes < max_bytes;
|
||||||
|
if (!ret)
|
||||||
|
LOG_TRACE(logger, "Waiting: bytes: {} >= {}", buffer.bytes, max_bytes);
|
||||||
|
return ret;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (is_cancelled || exception)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (max_bytes != 0 && buffer.bytes >= max_bytes)
|
||||||
|
{
|
||||||
|
lock.unlock();
|
||||||
|
setException(std::make_exception_ptr(
|
||||||
|
Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
||||||
|
"Timeout exceeded: Waiting: bytes: {} >= {}", buffer.bytes, max_bytes)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto it = buffer_.events.begin();
|
||||||
|
size_t bytes = buffer_.bytes;
|
||||||
|
if (!gtid_sets_wait.sets.empty())
|
||||||
|
{
|
||||||
|
if (!buffer_.position.gtid_sets.contains(gtid_sets_wait))
|
||||||
|
{
|
||||||
|
LOG_TRACE(logger, "(wait_until: {} / {}) Skipped bytes: {}",
|
||||||
|
gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), buffer_.bytes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<GTID> seqs;
|
||||||
|
for (auto & s : gtid_sets_wait.sets)
|
||||||
|
{
|
||||||
|
GTID g;
|
||||||
|
g.uuid = s.uuid;
|
||||||
|
for (auto & in : s.intervals)
|
||||||
|
{
|
||||||
|
g.seq_no = in.end;
|
||||||
|
seqs.push_back(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (; it != buffer_.events.end(); ++it)
|
||||||
|
{
|
||||||
|
const auto & event = *it;
|
||||||
|
auto find_if_func = [&](auto & a)
|
||||||
|
{
|
||||||
|
return std::static_pointer_cast<GTIDEvent>(event)->gtid == a;
|
||||||
|
};
|
||||||
|
if (event->header.type != GTID_EVENT || std::find_if(seqs.begin(), seqs.end(), find_if_func) == seqs.end())
|
||||||
|
{
|
||||||
|
LOG_TRACE(logger, "(wait_until: {} / {}) Skipped {}",
|
||||||
|
gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), magic_enum::enum_name(event->header.type));
|
||||||
|
bytes -= event->header.event_size;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
LOG_DEBUG(logger, "(wait_until: {} / {}) Starting {}: gtid seq_no: {}",
|
||||||
|
gtid_sets_wait.toString(), buffer_.position.gtid_sets.toString(), magic_enum::enum_name(event->header.type),
|
||||||
|
std::static_pointer_cast<GTIDEvent>(event)->gtid.seq_no);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
gtid_sets_wait = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it != buffer_.events.end())
|
||||||
|
{
|
||||||
|
std::copy(it, buffer_.events.end(), std::back_inserter(buffer.events));
|
||||||
|
buffer.bytes += bytes;
|
||||||
|
buffer.position = buffer_.position;
|
||||||
|
}
|
||||||
|
lock.unlock();
|
||||||
|
/// Notify that added some event
|
||||||
|
cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rethrowIfNeeded(const std::exception_ptr & exception, size_t events_size)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::rethrow_exception(exception);
|
||||||
|
}
|
||||||
|
catch (const Exception & e)
|
||||||
|
{
|
||||||
|
/// If timeout exceeded, it is safe to read all events before rethrowning
|
||||||
|
if (e.code() == ErrorCodes::TIMEOUT_EXCEEDED && events_size > 0)
|
||||||
|
return;
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isBinlogEventIgnored(const NameSet & mysql_database_names, const BinlogEventPtr & event)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
switch (event->header.type)
|
||||||
|
{
|
||||||
|
case WRITE_ROWS_EVENT_V1:
|
||||||
|
case WRITE_ROWS_EVENT_V2:
|
||||||
|
case DELETE_ROWS_EVENT_V1:
|
||||||
|
case DELETE_ROWS_EVENT_V2:
|
||||||
|
case UPDATE_ROWS_EVENT_V1:
|
||||||
|
case UPDATE_ROWS_EVENT_V2:
|
||||||
|
ret = !mysql_database_names.empty() && !mysql_database_names.contains(std::static_pointer_cast<RowsEvent>(event)->schema);
|
||||||
|
break;
|
||||||
|
case QUERY_EVENT:
|
||||||
|
if (event->type() != MYSQL_UNHANDLED_EVENT)
|
||||||
|
{
|
||||||
|
auto query_event = std::static_pointer_cast<QueryEvent>(event);
|
||||||
|
ret = !mysql_database_names.empty() &&
|
||||||
|
!query_event->query_database_name.empty() &&
|
||||||
|
!mysql_database_names.contains(query_event->query_database_name);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BinlogFromDispatcher::tryReadEvent(BinlogEventPtr & to, UInt64 ms)
|
||||||
|
{
|
||||||
|
auto wake_up_func = [&]
|
||||||
|
{
|
||||||
|
if (exception)
|
||||||
|
rethrowIfNeeded(exception, buffer.events.size());
|
||||||
|
return is_cancelled || !buffer.events.empty();
|
||||||
|
};
|
||||||
|
to = nullptr;
|
||||||
|
std::unique_lock lock(mutex);
|
||||||
|
if (!cv.wait_for(lock, std::chrono::milliseconds(ms), wake_up_func) || is_cancelled || buffer.events.empty())
|
||||||
|
return false;
|
||||||
|
to = buffer.events.front();
|
||||||
|
buffer.events.pop_front();
|
||||||
|
BinlogParser::updatePosition(to, position);
|
||||||
|
buffer.bytes -= to->header.event_size;
|
||||||
|
if (isBinlogEventIgnored(mysql_database_names, to))
|
||||||
|
to = std::make_shared<DryRunEvent>(EventHeader(to->header));
|
||||||
|
lock.unlock();
|
||||||
|
/// Notify that removed some event
|
||||||
|
cv.notify_all();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Position BinlogFromDispatcher::getPosition() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromDispatcher::setPosition(const Position & initial, const Position & wait)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (wait.gtid_sets.sets.empty())
|
||||||
|
{
|
||||||
|
position = initial;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
position = wait;
|
||||||
|
gtid_sets_wait = wait.gtid_sets;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinlogFromDispatcher::setException(const std::exception_ptr & exception_)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
exception = exception_;
|
||||||
|
}
|
||||||
|
cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogEventsDispatcher::BinlogMetadata BinlogFromDispatcher::getBinlogMetadata() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
BinlogEventsDispatcher::BinlogMetadata ret;
|
||||||
|
ret.name = name;
|
||||||
|
ret.position_write = buffer.position;
|
||||||
|
ret.position_read = position;
|
||||||
|
ret.size = buffer.events.size();
|
||||||
|
ret.bytes = buffer.bytes;
|
||||||
|
ret.max_bytes = max_bytes;
|
||||||
|
ret.max_waiting_ms = max_waiting_ms;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinlogEventsDispatcher::DispatcherMetadata BinlogEventsDispatcher::getDispatcherMetadata() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
DispatcherMetadata ret;
|
||||||
|
ret.name = logger_name;
|
||||||
|
ret.position = position;
|
||||||
|
ret.events_read_per_sec = events_read_per_sec;
|
||||||
|
ret.bytes_read_per_sec = bytes_read_per_sec;
|
||||||
|
ret.events_flush_per_sec = events_flush_per_sec;
|
||||||
|
ret.bytes_flush_per_sec = bytes_flush_per_sec;
|
||||||
|
|
||||||
|
for (const auto & it : binlogs)
|
||||||
|
{
|
||||||
|
if (auto binlog = it.lock())
|
||||||
|
ret.binlogs.push_back(binlog->getBinlogMetadata());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
136
src/Databases/MySQL/MySQLBinlogEventsDispatcher.h
Normal file
136
src/Databases/MySQL/MySQLBinlogEventsDispatcher.h
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Databases/MySQL/MySQLBinlog.h>
|
||||||
|
#include <Common/ThreadPool.h>
|
||||||
|
#include <Poco/Logger.h>
|
||||||
|
#include <base/unit.h>
|
||||||
|
|
||||||
|
namespace DB::MySQLReplication
|
||||||
|
{
|
||||||
|
|
||||||
|
class BinlogEventsDispatcher;
|
||||||
|
using BinlogEventsDispatcherPtr = std::shared_ptr<BinlogEventsDispatcher>;
|
||||||
|
class BinlogFromDispatcher;
|
||||||
|
|
||||||
|
/** Reads the binlog events from one source and dispatches them over consumers.
|
||||||
|
* If it can catch up the position of the another dispatcher, it will move all consumers to this dispatcher.
|
||||||
|
*/
|
||||||
|
class BinlogEventsDispatcher final : boost::noncopyable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BinlogEventsDispatcher(const String & logger_name_ = "BinlogDispatcher", size_t max_bytes_in_buffer_ = 1_MiB, UInt64 max_flush_ms_ = 1000);
|
||||||
|
~BinlogEventsDispatcher();
|
||||||
|
|
||||||
|
/// Moves all IBinlog objects to \a to if it has the same position
|
||||||
|
/// Supports syncing to multiple dispatchers
|
||||||
|
void syncTo(const BinlogEventsDispatcherPtr & to);
|
||||||
|
|
||||||
|
/** Creates a binlog and starts the dispatching
|
||||||
|
* binlog_read_from Source binlog to read events from
|
||||||
|
* name Identifies the binlog, could be not unique
|
||||||
|
* mysql_database_names Returns events only from these databases
|
||||||
|
* max_bytes Defines a limit in bytes for this binlog
|
||||||
|
* Note: Dispatching will be stopped for all binlogs if bytes in queue increases this limit
|
||||||
|
* max_waiting_ms Max wait time when max_bytes exceeded
|
||||||
|
*/
|
||||||
|
BinlogPtr start(const BinlogPtr & binlog_read_from_,
|
||||||
|
const String & name = {},
|
||||||
|
const NameSet & mysql_database_names = {},
|
||||||
|
size_t max_bytes = 0,
|
||||||
|
UInt64 max_waiting_ms = 0);
|
||||||
|
|
||||||
|
/** Creates a binlog if the dispatcher is started
|
||||||
|
* executed_gtid_set Can be higher value than current,
|
||||||
|
* otherwise not possible to attach
|
||||||
|
* name Identifies the binlog, could be not unique
|
||||||
|
* mysql_database_names Returns events only from these databases
|
||||||
|
* max_bytes Defines a limit in bytes for this binlog
|
||||||
|
* max_waiting_ms Max wait time when max_bytes exceeded
|
||||||
|
*/
|
||||||
|
BinlogPtr attach(const String & executed_gtid_set,
|
||||||
|
const String & name = {},
|
||||||
|
const NameSet & mysql_database_names = {},
|
||||||
|
size_t max_bytes = 0,
|
||||||
|
UInt64 max_waiting_ms = 0);
|
||||||
|
|
||||||
|
/// Cleans the destroyed binlogs up and returns true if empty
|
||||||
|
bool cleanupBinlogsAndStop();
|
||||||
|
|
||||||
|
/// Changes binlog_checksum for binlog_read_from
|
||||||
|
void setBinlogChecksum(const String & checksum);
|
||||||
|
|
||||||
|
Position getPosition() const;
|
||||||
|
|
||||||
|
struct BinlogMetadata
|
||||||
|
{
|
||||||
|
String name;
|
||||||
|
/// Position that was written to
|
||||||
|
Position position_write;
|
||||||
|
/// Position that was read from
|
||||||
|
Position position_read;
|
||||||
|
size_t size = 0;
|
||||||
|
size_t bytes = 0;
|
||||||
|
size_t max_bytes = 0;
|
||||||
|
UInt64 max_waiting_ms = 0;
|
||||||
|
};
|
||||||
|
struct DispatcherMetadata
|
||||||
|
{
|
||||||
|
String name;
|
||||||
|
Position position;
|
||||||
|
float events_read_per_sec = 0;
|
||||||
|
float bytes_read_per_sec = 0;
|
||||||
|
float events_flush_per_sec = 0;
|
||||||
|
float bytes_flush_per_sec = 0;
|
||||||
|
std::vector<BinlogMetadata> binlogs;
|
||||||
|
};
|
||||||
|
DispatcherMetadata getDispatcherMetadata() const;
|
||||||
|
|
||||||
|
struct Buffer
|
||||||
|
{
|
||||||
|
std::deque<BinlogEventPtr> events;
|
||||||
|
size_t bytes = 0;
|
||||||
|
Position position;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool cleanupLocked(const std::function<void(const std::shared_ptr<BinlogFromDispatcher> & binlog)> & fn = {});
|
||||||
|
bool startLocked(const String & executed_gtid_set);
|
||||||
|
void stopLocked();
|
||||||
|
BinlogPtr createBinlogLocked(const String & name = {},
|
||||||
|
const NameSet & mysql_database_names = {},
|
||||||
|
size_t max_bytes = 0,
|
||||||
|
UInt64 max_waiting_ms = 0,
|
||||||
|
const Position & pos_initial = {},
|
||||||
|
const Position & pos_wait = {});
|
||||||
|
void syncToLocked(const BinlogEventsDispatcherPtr & to);
|
||||||
|
bool trySyncLocked(BinlogEventsDispatcherPtr & to);
|
||||||
|
void flushBufferLocked();
|
||||||
|
void dispatchEvents();
|
||||||
|
|
||||||
|
const String logger_name;
|
||||||
|
const size_t max_bytes_in_buffer = 0;
|
||||||
|
const UInt64 max_flush_ms = 0;
|
||||||
|
Poco::Logger * logger = nullptr;
|
||||||
|
|
||||||
|
BinlogPtr binlog_read_from;
|
||||||
|
|
||||||
|
Position position;
|
||||||
|
std::vector<std::weak_ptr<BinlogEventsDispatcher>> sync_to;
|
||||||
|
std::vector<std::weak_ptr<BinlogFromDispatcher>> binlogs;
|
||||||
|
std::atomic_bool is_cancelled{false};
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
std::condition_variable cv;
|
||||||
|
std::unique_ptr<ThreadFromGlobalPool> dispatching_thread;
|
||||||
|
IBinlog::Checksum binlog_checksum = IBinlog::CRC32;
|
||||||
|
bool is_started = false;
|
||||||
|
Buffer buffer;
|
||||||
|
float events_read_per_sec = 0;
|
||||||
|
float bytes_read_per_sec = 0;
|
||||||
|
UInt64 events_flush = 0;
|
||||||
|
UInt64 events_flush_total_time = 0;
|
||||||
|
float events_flush_per_sec = 0;
|
||||||
|
UInt64 bytes_flush = 0;
|
||||||
|
float bytes_flush_per_sec = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
BIN
src/Databases/MySQL/tests/data/binlog.000016
Normal file
BIN
src/Databases/MySQL/tests/data/binlog.000016
Normal file
Binary file not shown.
BIN
src/Databases/MySQL/tests/data/binlog.001390
Normal file
BIN
src/Databases/MySQL/tests/data/binlog.001390
Normal file
Binary file not shown.
1754
src/Databases/MySQL/tests/gtest_mysql_binlog.cpp
Normal file
1754
src/Databases/MySQL/tests/gtest_mysql_binlog.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -258,12 +258,6 @@ String DiskObjectStorage::getUniqueId(const String & path) const
|
|||||||
|
|
||||||
bool DiskObjectStorage::checkUniqueId(const String & id) const
|
bool DiskObjectStorage::checkUniqueId(const String & id) const
|
||||||
{
|
{
|
||||||
if (!id.starts_with(object_key_prefix))
|
|
||||||
{
|
|
||||||
LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}, Stack {}", id, object_key_prefix, StackTrace().toString());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto object = StoredObject(id);
|
auto object = StoredObject(id);
|
||||||
return object_storage->exists(object);
|
return object_storage->exists(object);
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ namespace ErrorCodes
|
|||||||
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
|
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
|
||||||
{
|
{
|
||||||
readIntText(version, buf);
|
readIntText(version, buf);
|
||||||
|
assertChar('\n', buf);
|
||||||
|
|
||||||
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_FULL_OBJECT_KEY)
|
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_FULL_OBJECT_KEY)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
@ -27,8 +28,6 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
|
|||||||
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
|
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
|
||||||
metadata_file_path, toString(version), toString(VERSION_FULL_OBJECT_KEY));
|
metadata_file_path, toString(version), toString(VERSION_FULL_OBJECT_KEY));
|
||||||
|
|
||||||
assertChar('\n', buf);
|
|
||||||
|
|
||||||
UInt32 keys_count;
|
UInt32 keys_count;
|
||||||
readIntText(keys_count, buf);
|
readIntText(keys_count, buf);
|
||||||
assertChar('\t', buf);
|
assertChar('\t', buf);
|
||||||
@ -122,6 +121,7 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
|
|||||||
|
|
||||||
chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY);
|
chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY);
|
||||||
writeIntText(write_version, buf);
|
writeIntText(write_version, buf);
|
||||||
|
|
||||||
writeChar('\n', buf);
|
writeChar('\n', buf);
|
||||||
|
|
||||||
writeIntText(keys_with_meta.size(), buf);
|
writeIntText(keys_with_meta.size(), buf);
|
||||||
|
@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
#include <Disks/ObjectStorages/S3/diskSettings.h>
|
#include <Disks/ObjectStorages/S3/diskSettings.h>
|
||||||
|
|
||||||
#include <Common/getRandomASCIIString.h>
|
|
||||||
#include <Common/ProfileEvents.h>
|
#include <Common/ProfileEvents.h>
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
@ -556,27 +555,12 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
|
|||||||
return std::make_unique<S3ObjectStorage>(
|
return std::make_unique<S3ObjectStorage>(
|
||||||
std::move(new_client), std::move(new_s3_settings),
|
std::move(new_client), std::move(new_s3_settings),
|
||||||
version_id, s3_capabilities, new_namespace,
|
version_id, s3_capabilities, new_namespace,
|
||||||
endpoint, object_key_prefix, disk_name);
|
endpoint, key_generator, disk_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
|
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const
|
||||||
{
|
{
|
||||||
/// Path to store the new S3 object.
|
return key_generator->generate(path);
|
||||||
|
|
||||||
/// Total length is 32 a-z characters for enough randomness.
|
|
||||||
/// First 3 characters are used as a prefix for
|
|
||||||
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
|
|
||||||
|
|
||||||
constexpr size_t key_name_total_size = 32;
|
|
||||||
constexpr size_t key_name_prefix_size = 3;
|
|
||||||
|
|
||||||
/// Path to store new S3 object.
|
|
||||||
String key = fmt::format("{}/{}",
|
|
||||||
getRandomASCIIString(key_name_prefix_size),
|
|
||||||
getRandomASCIIString(key_name_total_size - key_name_prefix_size));
|
|
||||||
|
|
||||||
/// what ever key_prefix value is, consider that key as relative
|
|
||||||
return ObjectStorageKey::createAsRelative(object_key_prefix, key);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <Storages/StorageS3Settings.h>
|
#include <Storages/StorageS3Settings.h>
|
||||||
#include <Common/MultiVersion.h>
|
#include <Common/MultiVersion.h>
|
||||||
|
#include <Common/ObjectStorageKeyGenerator.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -39,7 +40,6 @@ struct S3ObjectStorageSettings
|
|||||||
bool read_only;
|
bool read_only;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class S3ObjectStorage : public IObjectStorage
|
class S3ObjectStorage : public IObjectStorage
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
@ -53,10 +53,10 @@ private:
|
|||||||
const S3Capabilities & s3_capabilities_,
|
const S3Capabilities & s3_capabilities_,
|
||||||
String bucket_,
|
String bucket_,
|
||||||
String connection_string,
|
String connection_string,
|
||||||
String object_key_prefix_,
|
ObjectStorageKeysGeneratorPtr key_generator_,
|
||||||
const String & disk_name_)
|
const String & disk_name_)
|
||||||
: bucket(std::move(bucket_))
|
: bucket(std::move(bucket_))
|
||||||
, object_key_prefix(std::move(object_key_prefix_))
|
, key_generator(std::move(key_generator_))
|
||||||
, disk_name(disk_name_)
|
, disk_name(disk_name_)
|
||||||
, client(std::move(client_))
|
, client(std::move(client_))
|
||||||
, s3_settings(std::move(s3_settings_))
|
, s3_settings(std::move(s3_settings_))
|
||||||
@ -179,7 +179,7 @@ private:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::string bucket;
|
std::string bucket;
|
||||||
String object_key_prefix;
|
ObjectStorageKeysGeneratorPtr key_generator;
|
||||||
std::string disk_name;
|
std::string disk_name;
|
||||||
|
|
||||||
MultiVersion<S3::Client> client;
|
MultiVersion<S3::Client> client;
|
||||||
@ -199,11 +199,6 @@ private:
|
|||||||
class S3PlainObjectStorage : public S3ObjectStorage
|
class S3PlainObjectStorage : public S3ObjectStorage
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
|
|
||||||
{
|
|
||||||
return ObjectStorageKey::createAsRelative(object_key_prefix, path);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string getName() const override { return "S3PlainObjectStorage"; }
|
std::string getName() const override { return "S3PlainObjectStorage"; }
|
||||||
|
|
||||||
template <class ...Args>
|
template <class ...Args>
|
||||||
|
@ -91,6 +91,60 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::pair<String, ObjectStorageKeysGeneratorPtr> getPrefixAndKeyGenerator(
|
||||||
|
String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||||
|
{
|
||||||
|
if (type == "s3_plain")
|
||||||
|
return {uri.key, createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key)};
|
||||||
|
|
||||||
|
chassert(type == "s3");
|
||||||
|
|
||||||
|
bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting();
|
||||||
|
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
|
||||||
|
|
||||||
|
if (send_metadata && storage_metadata_write_full_object_key)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Wrong configuration in {}. "
|
||||||
|
"s3 does not supports feature 'send_metadata' with feature 'storage_metadata_write_full_object_key'.",
|
||||||
|
config_prefix);
|
||||||
|
|
||||||
|
String object_key_compatibility_prefix = config.getString(config_prefix + ".key_compatibility_prefix", String());
|
||||||
|
String object_key_template = config.getString(config_prefix + ".key_template", String());
|
||||||
|
|
||||||
|
if (object_key_template.empty())
|
||||||
|
{
|
||||||
|
if (!object_key_compatibility_prefix.empty())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Wrong configuration in {}. "
|
||||||
|
"Setting 'key_compatibility_prefix' can be defined only with setting 'key_template'.",
|
||||||
|
config_prefix);
|
||||||
|
|
||||||
|
return {uri.key, createObjectStorageKeysGeneratorByPrefix(uri.key)};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (send_metadata)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Wrong configuration in {}. "
|
||||||
|
"s3 does not supports send_metadata with setting 'key_template'.",
|
||||||
|
config_prefix);
|
||||||
|
|
||||||
|
if (!storage_metadata_write_full_object_key)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Wrong configuration in {}. "
|
||||||
|
"Feature 'storage_metadata_write_full_object_key' has to be enabled in order to use setting 'key_template'.",
|
||||||
|
config_prefix);
|
||||||
|
|
||||||
|
if (!uri.key.empty())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Wrong configuration in {}. "
|
||||||
|
"URI.key is forbidden with settings 'key_template', use setting 'key_compatibility_prefix' instead'. "
|
||||||
|
"URI.key: '{}', bucket: '{}'. ",
|
||||||
|
config_prefix,
|
||||||
|
uri.key, uri.bucket);
|
||||||
|
|
||||||
|
return {object_key_compatibility_prefix, createObjectStorageKeysGeneratorByTemplate(object_key_template)};
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
||||||
@ -104,7 +158,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
|||||||
{
|
{
|
||||||
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
||||||
S3::URI uri(endpoint);
|
S3::URI uri(endpoint);
|
||||||
if (!uri.key.ends_with('/'))
|
// an empty key remains empty
|
||||||
|
if (!uri.key.empty() && !uri.key.ends_with('/'))
|
||||||
uri.key.push_back('/');
|
uri.key.push_back('/');
|
||||||
|
|
||||||
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
||||||
@ -113,6 +168,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
|||||||
String type = config.getString(config_prefix + ".type");
|
String type = config.getString(config_prefix + ".type");
|
||||||
chassert(type == "s3" || type == "s3_plain");
|
chassert(type == "s3" || type == "s3_plain");
|
||||||
|
|
||||||
|
auto [object_key_compatibility_prefix, object_key_generator] = getPrefixAndKeyGenerator(type, uri, config, config_prefix);
|
||||||
|
|
||||||
MetadataStoragePtr metadata_storage;
|
MetadataStoragePtr metadata_storage;
|
||||||
auto settings = getSettings(config, config_prefix, context);
|
auto settings = getSettings(config, config_prefix, context);
|
||||||
auto client = getClient(config, config_prefix, context, *settings);
|
auto client = getClient(config, config_prefix, context, *settings);
|
||||||
@ -128,20 +185,18 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
|||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata");
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata");
|
||||||
|
|
||||||
s3_storage = std::make_shared<S3PlainObjectStorage>(
|
s3_storage = std::make_shared<S3PlainObjectStorage>(
|
||||||
std::move(client), std::move(settings),
|
std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name);
|
||||||
uri.version_id, s3_capabilities,
|
|
||||||
uri.bucket, uri.endpoint, uri.key, name);
|
metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, object_key_compatibility_prefix);
|
||||||
metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, uri.key);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s3_storage = std::make_shared<S3ObjectStorage>(
|
s3_storage = std::make_shared<S3ObjectStorage>(
|
||||||
std::move(client), std::move(settings),
|
std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, object_key_generator, name);
|
||||||
uri.version_id, s3_capabilities,
|
|
||||||
uri.bucket, uri.endpoint, uri.key, name);
|
|
||||||
|
|
||||||
auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
|
auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
|
||||||
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
|
|
||||||
|
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, object_key_compatibility_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// NOTE: should we still perform this check for clickhouse-disks?
|
/// NOTE: should we still perform this check for clickhouse-disks?
|
||||||
@ -164,7 +219,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
|||||||
|
|
||||||
DiskObjectStoragePtr s3disk = std::make_shared<DiskObjectStorage>(
|
DiskObjectStoragePtr s3disk = std::make_shared<DiskObjectStorage>(
|
||||||
name,
|
name,
|
||||||
uri.key,
|
uri.key, /// might be empty
|
||||||
type == "s3" ? "DiskS3" : "DiskS3Plain",
|
type == "s3" ? "DiskS3" : "DiskS3Plain",
|
||||||
std::move(metadata_storage),
|
std::move(metadata_storage),
|
||||||
std::move(s3_storage),
|
std::move(s3_storage),
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#include <Common/UTF8Helpers.h>
|
#include <Common/UTF8Helpers.h>
|
||||||
#include <Common/iota.h>
|
#include <Common/iota.h>
|
||||||
|
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
#ifdef __SSE4_2__
|
#ifdef __SSE4_2__
|
||||||
# include <nmmintrin.h>
|
# include <nmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
@ -25,7 +27,7 @@ struct FunctionStringDistanceImpl
|
|||||||
{
|
{
|
||||||
using ResultType = typename Op::ResultType;
|
using ResultType = typename Op::ResultType;
|
||||||
|
|
||||||
static void constantConstant(const std::string & haystack, const std::string & needle, ResultType & res)
|
static void constantConstant(const String & haystack, const String & needle, ResultType & res)
|
||||||
{
|
{
|
||||||
res = Op::process(haystack.data(), haystack.size(), needle.data(), needle.size());
|
res = Op::process(haystack.data(), haystack.size(), needle.data(), needle.size());
|
||||||
}
|
}
|
||||||
@ -51,7 +53,7 @@ struct FunctionStringDistanceImpl
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void constantVector(
|
static void constantVector(
|
||||||
const std::string & haystack,
|
const String & haystack,
|
||||||
const ColumnString::Chars & needle_data,
|
const ColumnString::Chars & needle_data,
|
||||||
const ColumnString::Offsets & needle_offsets,
|
const ColumnString::Offsets & needle_offsets,
|
||||||
PaddedPODArray<ResultType> & res)
|
PaddedPODArray<ResultType> & res)
|
||||||
@ -70,7 +72,7 @@ struct FunctionStringDistanceImpl
|
|||||||
static void vectorConstant(
|
static void vectorConstant(
|
||||||
const ColumnString::Chars & data,
|
const ColumnString::Chars & data,
|
||||||
const ColumnString::Offsets & offsets,
|
const ColumnString::Offsets & offsets,
|
||||||
const std::string & needle,
|
const String & needle,
|
||||||
PaddedPODArray<ResultType> & res)
|
PaddedPODArray<ResultType> & res)
|
||||||
{
|
{
|
||||||
constantVector(needle, data, offsets, res);
|
constantVector(needle, data, offsets, res);
|
||||||
@ -81,7 +83,7 @@ struct FunctionStringDistanceImpl
|
|||||||
struct ByteHammingDistanceImpl
|
struct ByteHammingDistanceImpl
|
||||||
{
|
{
|
||||||
using ResultType = UInt64;
|
using ResultType = UInt64;
|
||||||
static ResultType inline process(
|
static ResultType process(
|
||||||
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
{
|
{
|
||||||
UInt64 res = 0;
|
UInt64 res = 0;
|
||||||
@ -115,7 +117,7 @@ template <bool is_utf8>
|
|||||||
struct ByteJaccardIndexImpl
|
struct ByteJaccardIndexImpl
|
||||||
{
|
{
|
||||||
using ResultType = Float64;
|
using ResultType = Float64;
|
||||||
static ResultType inline process(
|
static ResultType process(
|
||||||
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
{
|
{
|
||||||
if (haystack_size == 0 || needle_size == 0)
|
if (haystack_size == 0 || needle_size == 0)
|
||||||
@ -222,23 +224,23 @@ struct ByteJaccardIndexImpl
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr size_t max_string_size = 1u << 16;
|
||||||
|
|
||||||
struct ByteEditDistanceImpl
|
struct ByteEditDistanceImpl
|
||||||
{
|
{
|
||||||
using ResultType = UInt64;
|
using ResultType = UInt64;
|
||||||
static constexpr size_t max_string_size = 1u << 16;
|
|
||||||
|
|
||||||
static ResultType inline process(
|
static ResultType process(
|
||||||
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
{
|
{
|
||||||
if (haystack_size == 0 || needle_size == 0)
|
if (haystack_size == 0 || needle_size == 0)
|
||||||
return haystack_size + needle_size;
|
return haystack_size + needle_size;
|
||||||
|
|
||||||
/// Safety threshold against DoS, since we use two array to calculate the distance.
|
/// Safety threshold against DoS, since we use two arrays to calculate the distance.
|
||||||
if (haystack_size > max_string_size || needle_size > max_string_size)
|
if (haystack_size > max_string_size || needle_size > max_string_size)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::TOO_LARGE_STRING_SIZE,
|
ErrorCodes::TOO_LARGE_STRING_SIZE,
|
||||||
"The string size is too big for function editDistance, "
|
"The string size is too big for function editDistance, should be at most {}", max_string_size);
|
||||||
"should be at most {}", max_string_size);
|
|
||||||
|
|
||||||
PaddedPODArray<ResultType> distances0(haystack_size + 1, 0);
|
PaddedPODArray<ResultType> distances0(haystack_size + 1, 0);
|
||||||
PaddedPODArray<ResultType> distances1(haystack_size + 1, 0);
|
PaddedPODArray<ResultType> distances1(haystack_size + 1, 0);
|
||||||
@ -271,6 +273,180 @@ struct ByteEditDistanceImpl
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ByteDamerauLevenshteinDistanceImpl
|
||||||
|
{
|
||||||
|
using ResultType = UInt64;
|
||||||
|
|
||||||
|
static ResultType process(
|
||||||
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
|
{
|
||||||
|
/// Safety threshold against DoS
|
||||||
|
if (haystack_size > max_string_size || needle_size > max_string_size)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::TOO_LARGE_STRING_SIZE,
|
||||||
|
"The string size is too big for function damerauLevenshteinDistance, should be at most {}", max_string_size);
|
||||||
|
|
||||||
|
/// Shortcuts:
|
||||||
|
|
||||||
|
if (haystack_size == 0)
|
||||||
|
return needle_size;
|
||||||
|
|
||||||
|
if (needle_size == 0)
|
||||||
|
return haystack_size;
|
||||||
|
|
||||||
|
if (haystack_size == needle_size && memcmp(haystack, needle, haystack_size) == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/// Implements the algorithm for optimal string alignment distance from
|
||||||
|
/// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
|
||||||
|
|
||||||
|
/// Dynamically allocate memory for the 2D array
|
||||||
|
/// Allocating a 2D array, for convenience starts is an array of pointers to the start of the rows.
|
||||||
|
std::vector<int> d((needle_size + 1) * (haystack_size + 1));
|
||||||
|
std::vector<int *> starts(haystack_size + 1);
|
||||||
|
|
||||||
|
/// Setting the pointers in starts to the beginning of (needle_size + 1)-long intervals.
|
||||||
|
/// Also initialize the row values based on the mentioned algorithm.
|
||||||
|
for (size_t i = 0; i <= haystack_size; ++i)
|
||||||
|
{
|
||||||
|
starts[i] = d.data() + (needle_size + 1) * i;
|
||||||
|
starts[i][0] = static_cast<int>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t j = 0; j <= needle_size; ++j)
|
||||||
|
{
|
||||||
|
starts[0][j] = static_cast<int>(j);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 1; i <= haystack_size; ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = 1; j <= needle_size; ++j)
|
||||||
|
{
|
||||||
|
int cost = (haystack[i - 1] == needle[j - 1]) ? 0 : 1;
|
||||||
|
starts[i][j] = std::min(starts[i - 1][j] + 1, /// deletion
|
||||||
|
std::min(starts[i][j - 1] + 1, /// insertion
|
||||||
|
starts[i - 1][j - 1] + cost) /// substitution
|
||||||
|
);
|
||||||
|
if (i > 1 && j > 1 && haystack[i - 1] == needle[j - 2] && haystack[i - 2] == needle[j - 1])
|
||||||
|
starts[i][j] = std::min(starts[i][j], starts[i - 2][j - 2] + 1); /// transposition
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return starts[haystack_size][needle_size];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ByteJaroSimilarityImpl
|
||||||
|
{
|
||||||
|
using ResultType = Float64;
|
||||||
|
|
||||||
|
static ResultType process(
|
||||||
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
|
{
|
||||||
|
/// Safety threshold against DoS
|
||||||
|
if (haystack_size > max_string_size || needle_size > max_string_size)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::TOO_LARGE_STRING_SIZE,
|
||||||
|
"The string size is too big for function jaroSimilarity, should be at most {}", max_string_size);
|
||||||
|
|
||||||
|
/// Shortcuts:
|
||||||
|
|
||||||
|
if (haystack_size == 0)
|
||||||
|
return needle_size;
|
||||||
|
|
||||||
|
if (needle_size == 0)
|
||||||
|
return haystack_size;
|
||||||
|
|
||||||
|
if (haystack_size == needle_size && memcmp(haystack, needle, haystack_size) == 0)
|
||||||
|
return 1.0;
|
||||||
|
|
||||||
|
const int s1len = static_cast<int>(haystack_size);
|
||||||
|
const int s2len = static_cast<int>(needle_size);
|
||||||
|
|
||||||
|
/// Window size to search for matches in the other string
|
||||||
|
const int max_range = std::max(0, std::max(s1len, s2len) / 2 - 1);
|
||||||
|
std::vector<int> s1_matching(s1len, -1);
|
||||||
|
std::vector<int> s2_matching(s2len, -1);
|
||||||
|
|
||||||
|
/// Calculate matching characters
|
||||||
|
size_t matching_characters = 0;
|
||||||
|
for (int i = 0; i < s1len; i++)
|
||||||
|
{
|
||||||
|
/// Matching window
|
||||||
|
const int min_index = std::max(i - max_range, 0);
|
||||||
|
const int max_index = std::min(i + max_range + 1, s2len);
|
||||||
|
for (int j = min_index; j < max_index; j++)
|
||||||
|
{
|
||||||
|
if (s2_matching[j] == -1 && haystack[i] == needle[j])
|
||||||
|
{
|
||||||
|
s1_matching[i] = i;
|
||||||
|
s2_matching[j] = j;
|
||||||
|
matching_characters++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matching_characters == 0)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
/// Transpositions (one-way only)
|
||||||
|
double transpositions = 0.0;
|
||||||
|
for (size_t i = 0, s1i = 0, s2i = 0; i < matching_characters; i++)
|
||||||
|
{
|
||||||
|
while (s1_matching[s1i] == -1)
|
||||||
|
s1i++;
|
||||||
|
while (s2_matching[s2i] == -1)
|
||||||
|
s2i++;
|
||||||
|
if (haystack[s1i] != needle[s2i])
|
||||||
|
transpositions += 0.5;
|
||||||
|
s1i++;
|
||||||
|
s2i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
double m = static_cast<double>(matching_characters);
|
||||||
|
double jaro_similarity = 1.0 / 3.0 * (m / static_cast<double>(s1len)
|
||||||
|
+ m / static_cast<double>(s2len)
|
||||||
|
+ (m - transpositions) / m);
|
||||||
|
return jaro_similarity;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ByteJaroWinklerSimilarityImpl
|
||||||
|
{
|
||||||
|
using ResultType = Float64;
|
||||||
|
|
||||||
|
static ResultType process(
|
||||||
|
const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
|
||||||
|
{
|
||||||
|
static constexpr int max_prefix_length = 4;
|
||||||
|
static constexpr double scaling_factor = 0.1;
|
||||||
|
static constexpr double boost_threshold = 0.7;
|
||||||
|
|
||||||
|
/// Safety threshold against DoS
|
||||||
|
if (haystack_size > max_string_size || needle_size > max_string_size)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::TOO_LARGE_STRING_SIZE,
|
||||||
|
"The string size is too big for function jaroWinklerSimilarity, should be at most {}", max_string_size);
|
||||||
|
|
||||||
|
const int s1len = static_cast<int>(haystack_size);
|
||||||
|
const int s2len = static_cast<int>(needle_size);
|
||||||
|
|
||||||
|
ResultType jaro_winkler_similarity = ByteJaroSimilarityImpl::process(haystack, haystack_size, needle, needle_size);
|
||||||
|
|
||||||
|
if (jaro_winkler_similarity > boost_threshold)
|
||||||
|
{
|
||||||
|
const int common_length = std::min(max_prefix_length, std::min(s1len, s2len));
|
||||||
|
int common_prefix = 0;
|
||||||
|
while (common_prefix < common_length && haystack[common_prefix] == needle[common_prefix])
|
||||||
|
common_prefix++;
|
||||||
|
|
||||||
|
jaro_winkler_similarity += common_prefix * scaling_factor * (1.0 - jaro_winkler_similarity);
|
||||||
|
}
|
||||||
|
return jaro_winkler_similarity;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct NameByteHammingDistance
|
struct NameByteHammingDistance
|
||||||
{
|
{
|
||||||
static constexpr auto name = "byteHammingDistance";
|
static constexpr auto name = "byteHammingDistance";
|
||||||
@ -283,6 +459,12 @@ struct NameEditDistance
|
|||||||
};
|
};
|
||||||
using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
|
using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
|
||||||
|
|
||||||
|
struct NameDamerauLevenshteinDistance
|
||||||
|
{
|
||||||
|
static constexpr auto name = "damerauLevenshteinDistance";
|
||||||
|
};
|
||||||
|
using FunctionDamerauLevenshteinDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteDamerauLevenshteinDistanceImpl>, NameDamerauLevenshteinDistance>;
|
||||||
|
|
||||||
struct NameJaccardIndex
|
struct NameJaccardIndex
|
||||||
{
|
{
|
||||||
static constexpr auto name = "stringJaccardIndex";
|
static constexpr auto name = "stringJaccardIndex";
|
||||||
@ -295,6 +477,18 @@ struct NameJaccardIndexUTF8
|
|||||||
};
|
};
|
||||||
using FunctionStringJaccardIndexUTF8 = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl<true>>, NameJaccardIndexUTF8>;
|
using FunctionStringJaccardIndexUTF8 = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl<true>>, NameJaccardIndexUTF8>;
|
||||||
|
|
||||||
|
struct NameJaroSimilarity
|
||||||
|
{
|
||||||
|
static constexpr auto name = "jaroSimilarity";
|
||||||
|
};
|
||||||
|
using FunctionJaroSimilarity = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaroSimilarityImpl>, NameJaroSimilarity>;
|
||||||
|
|
||||||
|
struct NameJaroWinklerSimilarity
|
||||||
|
{
|
||||||
|
static constexpr auto name = "jaroWinklerSimilarity";
|
||||||
|
};
|
||||||
|
using FunctionJaroWinklerSimilarity = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaroWinklerSimilarityImpl>, NameJaroWinklerSimilarity>;
|
||||||
|
|
||||||
REGISTER_FUNCTION(StringDistance)
|
REGISTER_FUNCTION(StringDistance)
|
||||||
{
|
{
|
||||||
factory.registerFunction<FunctionByteHammingDistance>(
|
factory.registerFunction<FunctionByteHammingDistance>(
|
||||||
@ -305,9 +499,18 @@ REGISTER_FUNCTION(StringDistance)
|
|||||||
FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
|
FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
|
||||||
factory.registerAlias("levenshteinDistance", NameEditDistance::name);
|
factory.registerAlias("levenshteinDistance", NameEditDistance::name);
|
||||||
|
|
||||||
|
factory.registerFunction<FunctionDamerauLevenshteinDistance>(
|
||||||
|
FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"});
|
||||||
|
|
||||||
factory.registerFunction<FunctionStringJaccardIndex>(
|
factory.registerFunction<FunctionStringJaccardIndex>(
|
||||||
FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"});
|
FunctionDocumentation{.description = R"(Calculates the Jaccard similarity index between two byte strings.)"});
|
||||||
factory.registerFunction<FunctionStringJaccardIndexUTF8>(
|
factory.registerFunction<FunctionStringJaccardIndexUTF8>(
|
||||||
FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two UTF8 strings.)"});
|
FunctionDocumentation{.description = R"(Calculates the Jaccard similarity index between two UTF8 strings.)"});
|
||||||
|
|
||||||
|
factory.registerFunction<FunctionJaroSimilarity>(
|
||||||
|
FunctionDocumentation{.description = R"(Calculates the Jaro similarity between two byte-string.)"});
|
||||||
|
|
||||||
|
factory.registerFunction<FunctionJaroWinklerSimilarity>(
|
||||||
|
FunctionDocumentation{.description = R"(Calculates the Jaro-Winkler similarity between two byte-string.)"});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -434,7 +434,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
FunctionArgumentDescriptors optional_args{
|
FunctionArgumentDescriptors optional_args{
|
||||||
{optional_argument_names[0], &isNumber<IDataType>, isColumnConst, "const Number"},
|
{optional_argument_names[0], &isNumber<IDataType>, nullptr, "const Number"},
|
||||||
{optional_argument_names[1], &isNumber<IDataType>, isColumnConst, "const Number"},
|
{optional_argument_names[1], &isNumber<IDataType>, isColumnConst, "const Number"},
|
||||||
{optional_argument_names[2], &isString<IDataType>, isColumnConst, "const String"}
|
{optional_argument_names[2], &isString<IDataType>, isColumnConst, "const String"}
|
||||||
};
|
};
|
||||||
|
@ -143,7 +143,6 @@ public:
|
|||||||
* depending on values of conditions.
|
* depending on values of conditions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
std::vector<Instruction> instructions;
|
std::vector<Instruction> instructions;
|
||||||
instructions.reserve(arguments.size() / 2 + 1);
|
instructions.reserve(arguments.size() / 2 + 1);
|
||||||
|
|
||||||
@ -238,7 +237,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto & settings = context->getSettingsRef();
|
const auto & settings = context->getSettingsRef();
|
||||||
const WhichDataType which(result_type);
|
const WhichDataType which(removeNullable(result_type));
|
||||||
bool execute_multiif_columnar
|
bool execute_multiif_columnar
|
||||||
= settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat());
|
= settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat());
|
||||||
|
|
||||||
@ -254,8 +253,12 @@ public:
|
|||||||
if (which.is##TYPE()) \
|
if (which.is##TYPE()) \
|
||||||
{ \
|
{ \
|
||||||
MutableColumnPtr res = ColumnVector<TYPE>::create(rows); \
|
MutableColumnPtr res = ColumnVector<TYPE>::create(rows); \
|
||||||
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res); \
|
MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \
|
||||||
return std::move(res); \
|
executeInstructionsColumnar<TYPE, INDEX>(instructions, rows, res, null_map, result_type->isNullable()); \
|
||||||
|
if (!result_type->isNullable()) \
|
||||||
|
return std::move(res); \
|
||||||
|
else \
|
||||||
|
return ColumnNullable::create(std::move(res), std::move(null_map)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ENUMERATE_NUMERIC_TYPES(M, INDEX) \
|
#define ENUMERATE_NUMERIC_TYPES(M, INDEX) \
|
||||||
@ -295,6 +298,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
static void executeInstructions(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
|
static void executeInstructions(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < rows; ++i)
|
for (size_t i = 0; i < rows; ++i)
|
||||||
@ -374,17 +378,59 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S>
|
template <typename T, typename S>
|
||||||
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res)
|
static void executeInstructionsColumnar(std::vector<Instruction> & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable)
|
||||||
{
|
{
|
||||||
PaddedPODArray<S> inserts(rows, static_cast<S>(instructions.size()));
|
PaddedPODArray<S> inserts(rows, static_cast<S>(instructions.size()));
|
||||||
calculateInserts(instructions, rows, inserts);
|
calculateInserts(instructions, rows, inserts);
|
||||||
|
|
||||||
PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(*res).getData();
|
PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(*res).getData();
|
||||||
for (size_t row_i = 0; row_i < rows; ++row_i)
|
if (!nullable)
|
||||||
{
|
{
|
||||||
auto & instruction = instructions[inserts[row_i]];
|
for (size_t row_i = 0; row_i < rows; ++row_i)
|
||||||
auto ref = instruction.source->getDataAt(row_i);
|
{
|
||||||
res_data[row_i] = *reinterpret_cast<const T*>(ref.data);
|
auto & instruction = instructions[inserts[row_i]];
|
||||||
|
auto ref = instruction.source->getDataAt(row_i);
|
||||||
|
res_data[row_i] = *reinterpret_cast<const T*>(ref.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PaddedPODArray<UInt8> & null_map_data = assert_cast<ColumnUInt8 &>(*null_map).getData();
|
||||||
|
std::vector<const T*> data_cols(instructions.size());
|
||||||
|
std::vector<const UInt8 *> null_map_cols(instructions.size());
|
||||||
|
ColumnPtr shared_null_map_col = nullptr;
|
||||||
|
for (size_t i = 0; i < instructions.size(); ++i)
|
||||||
|
{
|
||||||
|
if (instructions[i].source->isNullable())
|
||||||
|
{
|
||||||
|
const ColumnNullable * nullable_col;
|
||||||
|
if (!instructions[i].source_is_constant)
|
||||||
|
nullable_col = assert_cast<const ColumnNullable *>(instructions[i].source.get());
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const ColumnPtr data_column = assert_cast<const ColumnConst &>(*instructions[i].source).getDataColumnPtr();
|
||||||
|
nullable_col = assert_cast<const ColumnNullable *>(data_column.get());
|
||||||
|
}
|
||||||
|
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*nullable_col->getNullMapColumnPtr()).getData().data();
|
||||||
|
data_cols[i] = assert_cast<const ColumnVector<T> &>(*nullable_col->getNestedColumnPtr()).getData().data();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!shared_null_map_col)
|
||||||
|
{
|
||||||
|
shared_null_map_col = ColumnUInt8::create(rows, 0);
|
||||||
|
}
|
||||||
|
null_map_cols[i] = assert_cast<const ColumnUInt8 &>(*shared_null_map_col).getData().data();
|
||||||
|
data_cols[i] = assert_cast<const ColumnVector<T> &>(*instructions[i].source).getData().data();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t row_i = 0; row_i < rows; ++row_i)
|
||||||
|
{
|
||||||
|
auto & instruction = instructions[inserts[row_i]];
|
||||||
|
size_t index = instruction.source_is_constant ? 0 : row_i;
|
||||||
|
res_data[row_i] = *(data_cols[inserts[row_i]] + index);
|
||||||
|
null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
||||||
std::unique_ptr<WriteBuffer> nested, CompressionMethod method, int level, size_t buf_size, char * existing_memory, size_t alignment)
|
std::unique_ptr<WriteBuffer> nested, CompressionMethod method, int level, int zstd_window_log, size_t buf_size, char * existing_memory, size_t alignment)
|
||||||
{
|
{
|
||||||
if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib)
|
if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib)
|
||||||
return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(nested), method, level, buf_size, existing_memory, alignment);
|
return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(nested), method, level, buf_size, existing_memory, alignment);
|
||||||
@ -183,7 +183,7 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
|||||||
return std::make_unique<LZMADeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
return std::make_unique<LZMADeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||||
|
|
||||||
if (method == CompressionMethod::Zstd)
|
if (method == CompressionMethod::Zstd)
|
||||||
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, zstd_window_log, buf_size, existing_memory, alignment);
|
||||||
|
|
||||||
if (method == CompressionMethod::Lz4)
|
if (method == CompressionMethod::Lz4)
|
||||||
return std::make_unique<Lz4DeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
return std::make_unique<Lz4DeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||||
|
@ -66,6 +66,7 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
|||||||
std::unique_ptr<WriteBuffer> nested,
|
std::unique_ptr<WriteBuffer> nested,
|
||||||
CompressionMethod method,
|
CompressionMethod method,
|
||||||
int level,
|
int level,
|
||||||
|
int zstd_window_log = 0,
|
||||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
char * existing_memory = nullptr,
|
char * existing_memory = nullptr,
|
||||||
size_t alignment = 0);
|
size_t alignment = 0);
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
|
|
||||||
#include <aws/core/client/CoreErrors.h>
|
#include <aws/core/client/CoreErrors.h>
|
||||||
#include <aws/core/client/DefaultRetryStrategy.h>
|
|
||||||
#include <aws/s3/model/HeadBucketRequest.h>
|
#include <aws/s3/model/HeadBucketRequest.h>
|
||||||
#include <aws/s3/model/GetObjectRequest.h>
|
#include <aws/s3/model/GetObjectRequest.h>
|
||||||
#include <aws/s3/model/HeadObjectRequest.h>
|
#include <aws/s3/model/HeadObjectRequest.h>
|
||||||
@ -15,7 +14,6 @@
|
|||||||
|
|
||||||
#include <Poco/Net/NetException.h>
|
#include <Poco/Net/NetException.h>
|
||||||
|
|
||||||
#include <IO/S3Common.h>
|
|
||||||
#include <IO/S3/Requests.h>
|
#include <IO/S3/Requests.h>
|
||||||
#include <IO/S3/PocoHTTPClientFactory.h>
|
#include <IO/S3/PocoHTTPClientFactory.h>
|
||||||
#include <IO/S3/AWSLogger.h>
|
#include <IO/S3/AWSLogger.h>
|
||||||
@ -37,6 +35,9 @@ namespace ProfileEvents
|
|||||||
|
|
||||||
extern const Event DiskS3WriteRequestsErrors;
|
extern const Event DiskS3WriteRequestsErrors;
|
||||||
extern const Event DiskS3ReadRequestsErrors;
|
extern const Event DiskS3ReadRequestsErrors;
|
||||||
|
|
||||||
|
extern const Event S3Clients;
|
||||||
|
extern const Event TinyS3Clients;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -199,6 +200,8 @@ Client::Client(
|
|||||||
|
|
||||||
cache = std::make_shared<ClientCache>();
|
cache = std::make_shared<ClientCache>();
|
||||||
ClientCacheRegistry::instance().registerClient(cache);
|
ClientCacheRegistry::instance().registerClient(cache);
|
||||||
|
|
||||||
|
ProfileEvents::increment(ProfileEvents::S3Clients);
|
||||||
}
|
}
|
||||||
|
|
||||||
Client::Client(
|
Client::Client(
|
||||||
@ -219,6 +222,22 @@ Client::Client(
|
|||||||
{
|
{
|
||||||
cache = std::make_shared<ClientCache>(*other.cache);
|
cache = std::make_shared<ClientCache>(*other.cache);
|
||||||
ClientCacheRegistry::instance().registerClient(cache);
|
ClientCacheRegistry::instance().registerClient(cache);
|
||||||
|
|
||||||
|
ProfileEvents::increment(ProfileEvents::TinyS3Clients);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Client::~Client()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
ClientCacheRegistry::instance().unregisterClient(cache.get());
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
tryLogCurrentException(log);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Aws::Auth::AWSCredentials Client::getCredentials() const
|
Aws::Auth::AWSCredentials Client::getCredentials() const
|
||||||
|
@ -142,18 +142,7 @@ public:
|
|||||||
Client(Client && other) = delete;
|
Client(Client && other) = delete;
|
||||||
Client & operator=(Client &&) = delete;
|
Client & operator=(Client &&) = delete;
|
||||||
|
|
||||||
~Client() override
|
~Client() override;
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
ClientCacheRegistry::instance().unregisterClient(cache.get());
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
tryLogCurrentException(log);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the initial endpoint.
|
/// Returns the initial endpoint.
|
||||||
const String & getInitialEndpoint() const { return initial_endpoint; }
|
const String & getInitialEndpoint() const { return initial_endpoint; }
|
||||||
@ -170,7 +159,7 @@ public:
|
|||||||
class RetryStrategy : public Aws::Client::RetryStrategy
|
class RetryStrategy : public Aws::Client::RetryStrategy
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000);
|
explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000);
|
||||||
|
|
||||||
/// NOLINTNEXTLINE(google-runtime-int)
|
/// NOLINTNEXTLINE(google-runtime-int)
|
||||||
bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, long attemptedRetries) const override;
|
bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, long attemptedRetries) const override;
|
||||||
|
@ -6,21 +6,12 @@
|
|||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
|
|
||||||
# include <Common/quoteString.h>
|
|
||||||
|
|
||||||
# include <IO/WriteBufferFromString.h>
|
|
||||||
# include <IO/HTTPHeaderEntries.h>
|
# include <IO/HTTPHeaderEntries.h>
|
||||||
# include <Storages/StorageS3Settings.h>
|
|
||||||
|
|
||||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
|
||||||
# include <IO/S3/PocoHTTPClient.h>
|
|
||||||
# include <IO/S3/Client.h>
|
# include <IO/S3/Client.h>
|
||||||
# include <IO/S3/URI.h>
|
|
||||||
# include <IO/S3/Requests.h>
|
# include <IO/S3/Requests.h>
|
||||||
# include <IO/S3/Credentials.h>
|
# include <Common/quoteString.h>
|
||||||
# include <Common/logger_useful.h>
|
# include <Common/logger_useful.h>
|
||||||
|
|
||||||
# include <fstream>
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
{
|
{
|
||||||
@ -147,6 +138,12 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AuthSettings::hasUpdates(const AuthSettings & other) const
|
||||||
|
{
|
||||||
|
AuthSettings copy = *this;
|
||||||
|
copy.updateFrom(other);
|
||||||
|
return *this != copy;
|
||||||
|
}
|
||||||
|
|
||||||
void AuthSettings::updateFrom(const AuthSettings & from)
|
void AuthSettings::updateFrom(const AuthSettings & from)
|
||||||
{
|
{
|
||||||
@ -175,7 +172,7 @@ void AuthSettings::updateFrom(const AuthSettings & from)
|
|||||||
expiration_window_seconds = from.expiration_window_seconds;
|
expiration_window_seconds = from.expiration_window_seconds;
|
||||||
|
|
||||||
if (from.no_sign_request.has_value())
|
if (from.no_sign_request.has_value())
|
||||||
no_sign_request = *from.no_sign_request;
|
no_sign_request = from.no_sign_request;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -92,9 +92,11 @@ struct AuthSettings
|
|||||||
std::optional<uint64_t> expiration_window_seconds;
|
std::optional<uint64_t> expiration_window_seconds;
|
||||||
std::optional<bool> no_sign_request;
|
std::optional<bool> no_sign_request;
|
||||||
|
|
||||||
bool operator==(const AuthSettings & other) const = default;
|
bool hasUpdates(const AuthSettings & other) const;
|
||||||
|
|
||||||
void updateFrom(const AuthSettings & from);
|
void updateFrom(const AuthSettings & from);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool operator==(const AuthSettings & other) const = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,30 +1,51 @@
|
|||||||
#include <IO/ZstdDeflatingWriteBuffer.h>
|
#include <IO/ZstdDeflatingWriteBuffer.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int ZSTD_ENCODER_FAILED;
|
extern const int ZSTD_ENCODER_FAILED;
|
||||||
|
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setZstdParameter(ZSTD_CCtx * cctx, ZSTD_cParameter param, int value)
|
||||||
|
{
|
||||||
|
auto ret = ZSTD_CCtx_setParameter(cctx, param, value);
|
||||||
|
if (ZSTD_isError(ret))
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::ZSTD_ENCODER_FAILED,
|
||||||
|
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
|
||||||
|
ret,
|
||||||
|
ZSTD_VERSION_STRING);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
|
ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
|
||||||
std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
|
std::unique_ptr<WriteBuffer> out_, int compression_level, int window_log, size_t buf_size, char * existing_memory, size_t alignment)
|
||||||
: WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment)
|
: WriteBufferWithOwnMemoryDecorator(std::move(out_), buf_size, existing_memory, alignment)
|
||||||
{
|
{
|
||||||
cctx = ZSTD_createCCtx();
|
cctx = ZSTD_createCCtx();
|
||||||
if (cctx == nullptr)
|
if (cctx == nullptr)
|
||||||
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
|
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
|
||||||
size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level);
|
setZstdParameter(cctx, ZSTD_c_compressionLevel, compression_level);
|
||||||
if (ZSTD_isError(ret))
|
|
||||||
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
|
if (window_log > 0)
|
||||||
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
|
{
|
||||||
ret, ZSTD_VERSION_STRING);
|
ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
|
||||||
ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
|
if (ZSTD_isError(window_log_bounds.error))
|
||||||
if (ZSTD_isError(ret))
|
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "ZSTD windowLog parameter is not supported {}",
|
||||||
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
|
std::string(ZSTD_getErrorName(window_log_bounds.error)));
|
||||||
"zstd stream encoder option setting failed: error code: {}; zstd version: {}",
|
if (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound)
|
||||||
ret, ZSTD_VERSION_STRING);
|
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER,
|
||||||
|
"ZSTD codec can't have window log more than {} and lower than {}, given {}",
|
||||||
|
toString(window_log_bounds.upperBound),
|
||||||
|
toString(window_log_bounds.lowerBound), toString(window_log));
|
||||||
|
setZstdParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1);
|
||||||
|
setZstdParameter(cctx, ZSTD_c_windowLog, window_log);
|
||||||
|
}
|
||||||
|
|
||||||
|
setZstdParameter(cctx, ZSTD_c_checksumFlag, 1);
|
||||||
|
|
||||||
input = {nullptr, 0, 0};
|
input = {nullptr, 0, 0};
|
||||||
output = {nullptr, 0, 0};
|
output = {nullptr, 0, 0};
|
||||||
|
@ -17,6 +17,7 @@ public:
|
|||||||
ZstdDeflatingWriteBuffer(
|
ZstdDeflatingWriteBuffer(
|
||||||
std::unique_ptr<WriteBuffer> out_,
|
std::unique_ptr<WriteBuffer> out_,
|
||||||
int compression_level,
|
int compression_level,
|
||||||
|
int window_log = 0,
|
||||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
char * existing_memory = nullptr,
|
char * existing_memory = nullptr,
|
||||||
size_t alignment = 0);
|
size_t alignment = 0);
|
||||||
|
@ -46,6 +46,9 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<Tabl
|
|||||||
auto inner_hash_join = std::make_shared<InternalHashJoin>();
|
auto inner_hash_join = std::make_shared<InternalHashJoin>();
|
||||||
|
|
||||||
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", i));
|
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", i));
|
||||||
|
/// Non zero `max_joined_block_rows` allows to process block partially and return not processed part.
|
||||||
|
/// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature.
|
||||||
|
inner_hash_join->data->setMaxJoinedBlockRows(0);
|
||||||
hash_joins.emplace_back(std::move(inner_hash_join));
|
hash_joins.emplace_back(std::move(inner_hash_join));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1583,9 +1583,7 @@ bool Context::hasScalar(const String & name) const
|
|||||||
void Context::addQueryAccessInfo(
|
void Context::addQueryAccessInfo(
|
||||||
const String & quoted_database_name,
|
const String & quoted_database_name,
|
||||||
const String & full_quoted_table_name,
|
const String & full_quoted_table_name,
|
||||||
const Names & column_names,
|
const Names & column_names)
|
||||||
const String & projection_name,
|
|
||||||
const String & view_name)
|
|
||||||
{
|
{
|
||||||
if (isGlobalContext())
|
if (isGlobalContext())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
|
||||||
@ -1593,12 +1591,9 @@ void Context::addQueryAccessInfo(
|
|||||||
std::lock_guard lock(query_access_info.mutex);
|
std::lock_guard lock(query_access_info.mutex);
|
||||||
query_access_info.databases.emplace(quoted_database_name);
|
query_access_info.databases.emplace(quoted_database_name);
|
||||||
query_access_info.tables.emplace(full_quoted_table_name);
|
query_access_info.tables.emplace(full_quoted_table_name);
|
||||||
|
|
||||||
for (const auto & column_name : column_names)
|
for (const auto & column_name : column_names)
|
||||||
query_access_info.columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name));
|
query_access_info.columns.emplace(full_quoted_table_name + "." + backQuoteIfNeed(column_name));
|
||||||
if (!projection_name.empty())
|
|
||||||
query_access_info.projections.emplace(full_quoted_table_name + "." + backQuoteIfNeed(projection_name));
|
|
||||||
if (!view_name.empty())
|
|
||||||
query_access_info.views.emplace(view_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Context::addQueryAccessInfo(const Names & partition_names)
|
void Context::addQueryAccessInfo(const Names & partition_names)
|
||||||
@ -1611,6 +1606,15 @@ void Context::addQueryAccessInfo(const Names & partition_names)
|
|||||||
query_access_info.partitions.emplace(partition_name);
|
query_access_info.partitions.emplace(partition_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Context::addViewAccessInfo(const String & view_name)
|
||||||
|
{
|
||||||
|
if (isGlobalContext())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(query_access_info.mutex);
|
||||||
|
query_access_info.views.emplace(view_name);
|
||||||
|
}
|
||||||
|
|
||||||
void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
|
void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
|
||||||
{
|
{
|
||||||
if (!qualified_projection_name)
|
if (!qualified_projection_name)
|
||||||
|
@ -693,13 +693,14 @@ public:
|
|||||||
void addSpecialScalar(const String & name, const Block & block);
|
void addSpecialScalar(const String & name, const Block & block);
|
||||||
|
|
||||||
const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; }
|
const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; }
|
||||||
|
|
||||||
void addQueryAccessInfo(
|
void addQueryAccessInfo(
|
||||||
const String & quoted_database_name,
|
const String & quoted_database_name,
|
||||||
const String & full_quoted_table_name,
|
const String & full_quoted_table_name,
|
||||||
const Names & column_names,
|
const Names & column_names);
|
||||||
const String & projection_name = {},
|
|
||||||
const String & view_name = {});
|
|
||||||
void addQueryAccessInfo(const Names & partition_names);
|
void addQueryAccessInfo(const Names & partition_names);
|
||||||
|
void addViewAccessInfo(const String & view_name);
|
||||||
|
|
||||||
struct QualifiedProjectionName
|
struct QualifiedProjectionName
|
||||||
{
|
{
|
||||||
@ -707,8 +708,8 @@ public:
|
|||||||
String projection_name;
|
String projection_name;
|
||||||
explicit operator bool() const { return !projection_name.empty(); }
|
explicit operator bool() const { return !projection_name.empty(); }
|
||||||
};
|
};
|
||||||
void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
|
|
||||||
|
|
||||||
|
void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
|
||||||
|
|
||||||
/// Supported factories for records in query_log
|
/// Supported factories for records in query_log
|
||||||
enum class QueryLogFactories
|
enum class QueryLogFactories
|
||||||
|
@ -1050,7 +1050,7 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
|
|||||||
join_element.table_expression,
|
join_element.table_expression,
|
||||||
context,
|
context,
|
||||||
original_right_column_names,
|
original_right_column_names,
|
||||||
query_options.copy().setWithAllColumns().ignoreProjections(false).ignoreAlias(false));
|
query_options.copy().setWithAllColumns().ignoreAlias(false));
|
||||||
auto joined_plan = std::make_unique<QueryPlan>();
|
auto joined_plan = std::make_unique<QueryPlan>();
|
||||||
interpreter->buildQueryPlan(*joined_plan);
|
interpreter->buildQueryPlan(*joined_plan);
|
||||||
{
|
{
|
||||||
|
@ -243,6 +243,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
|
|||||||
, asof_inequality(table_join->getAsofInequality())
|
, asof_inequality(table_join->getAsofInequality())
|
||||||
, data(std::make_shared<RightTableData>())
|
, data(std::make_shared<RightTableData>())
|
||||||
, right_sample_block(right_sample_block_)
|
, right_sample_block(right_sample_block_)
|
||||||
|
, max_joined_block_rows(table_join->maxJoinedBlockRows())
|
||||||
, instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "")
|
, instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "")
|
||||||
, log(&Poco::Logger::get("HashJoin"))
|
, log(&Poco::Logger::get("HashJoin"))
|
||||||
{
|
{
|
||||||
@ -1401,7 +1402,7 @@ NO_INLINE size_t joinRightColumns(
|
|||||||
{
|
{
|
||||||
if constexpr (join_features.need_replication)
|
if constexpr (join_features.need_replication)
|
||||||
{
|
{
|
||||||
if (unlikely(current_offset > max_joined_block_rows))
|
if (unlikely(current_offset >= max_joined_block_rows))
|
||||||
{
|
{
|
||||||
added_columns.offsets_to_replicate->resize_assume_reserved(i);
|
added_columns.offsets_to_replicate->resize_assume_reserved(i);
|
||||||
added_columns.filter.resize_assume_reserved(i);
|
added_columns.filter.resize_assume_reserved(i);
|
||||||
@ -1690,7 +1691,7 @@ Block HashJoin::joinBlockImpl(
|
|||||||
|
|
||||||
bool has_required_right_keys = (required_right_keys.columns() != 0);
|
bool has_required_right_keys = (required_right_keys.columns() != 0);
|
||||||
added_columns.need_filter = join_features.need_filter || has_required_right_keys;
|
added_columns.need_filter = join_features.need_filter || has_required_right_keys;
|
||||||
added_columns.max_joined_block_rows = table_join->maxJoinedBlockRows();
|
added_columns.max_joined_block_rows = max_joined_block_rows;
|
||||||
if (!added_columns.max_joined_block_rows)
|
if (!added_columns.max_joined_block_rows)
|
||||||
added_columns.max_joined_block_rows = std::numeric_limits<size_t>::max();
|
added_columns.max_joined_block_rows = std::numeric_limits<size_t>::max();
|
||||||
else
|
else
|
||||||
@ -1771,7 +1772,6 @@ Block HashJoin::joinBlockImpl(
|
|||||||
|
|
||||||
void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const
|
void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const
|
||||||
{
|
{
|
||||||
size_t max_joined_block_rows = table_join->maxJoinedBlockRows();
|
|
||||||
size_t start_left_row = 0;
|
size_t start_left_row = 0;
|
||||||
size_t start_right_block = 0;
|
size_t start_right_block = 0;
|
||||||
if (not_processed)
|
if (not_processed)
|
||||||
|
@ -396,6 +396,8 @@ public:
|
|||||||
|
|
||||||
void shrinkStoredBlocksToFit(size_t & total_bytes_in_join);
|
void shrinkStoredBlocksToFit(size_t & total_bytes_in_join);
|
||||||
|
|
||||||
|
void setMaxJoinedBlockRows(size_t value) { max_joined_block_rows = value; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template<bool> friend class NotJoinedHash;
|
template<bool> friend class NotJoinedHash;
|
||||||
|
|
||||||
@ -433,6 +435,9 @@ private:
|
|||||||
/// Left table column names that are sources for required_right_keys columns
|
/// Left table column names that are sources for required_right_keys columns
|
||||||
std::vector<String> required_right_keys_sources;
|
std::vector<String> required_right_keys_sources;
|
||||||
|
|
||||||
|
/// Maximum number of rows in result block. If it is 0, then no limits.
|
||||||
|
size_t max_joined_block_rows = 0;
|
||||||
|
|
||||||
/// When tracked memory consumption is more than a threshold, we will shrink to fit stored blocks.
|
/// When tracked memory consumption is more than a threshold, we will shrink to fit stored blocks.
|
||||||
bool shrink_blocks = false;
|
bool shrink_blocks = false;
|
||||||
Int64 memory_usage_before_adding_blocks = 0;
|
Int64 memory_usage_before_adding_blocks = 0;
|
||||||
|
@ -596,6 +596,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
|||||||
bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs;
|
bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs;
|
||||||
bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs;
|
bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs;
|
||||||
bool enable_deflate_qpl_codec = attach || context_->getSettingsRef().enable_deflate_qpl_codec;
|
bool enable_deflate_qpl_codec = attach || context_->getSettingsRef().enable_deflate_qpl_codec;
|
||||||
|
bool enable_zstd_qat_codec = attach || context_->getSettingsRef().enable_zstd_qat_codec;
|
||||||
|
|
||||||
ColumnsDescription res;
|
ColumnsDescription res;
|
||||||
auto name_type_it = column_names_and_types.begin();
|
auto name_type_it = column_names_and_types.begin();
|
||||||
@ -656,7 +657,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
|||||||
if (col_decl.default_specifier == "ALIAS")
|
if (col_decl.default_specifier == "ALIAS")
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS");
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS");
|
||||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
|
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
|
||||||
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec);
|
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (col_decl.stat_type)
|
if (col_decl.stat_type)
|
||||||
|
@ -390,8 +390,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
if (!prepared_sets)
|
if (!prepared_sets)
|
||||||
prepared_sets = std::make_shared<PreparedSets>();
|
prepared_sets = std::make_shared<PreparedSets>();
|
||||||
|
|
||||||
query_info.ignore_projections = options.ignore_projections;
|
|
||||||
query_info.is_projection_query = options.is_projection_query;
|
|
||||||
query_info.is_internal = options.is_internal;
|
query_info.is_internal = options.is_internal;
|
||||||
|
|
||||||
initSettings();
|
initSettings();
|
||||||
@ -417,7 +415,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
}
|
}
|
||||||
|
|
||||||
query_info.query = query_ptr->clone();
|
query_info.query = query_ptr->clone();
|
||||||
query_info.original_query = query_ptr->clone();
|
|
||||||
|
|
||||||
if (settings.count_distinct_optimization)
|
if (settings.count_distinct_optimization)
|
||||||
{
|
{
|
||||||
@ -856,9 +853,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
analysis_result.required_columns = required_columns;
|
analysis_result.required_columns = required_columns;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (query_info.projection)
|
|
||||||
storage_snapshot->addProjection(query_info.projection->desc);
|
|
||||||
|
|
||||||
/// Blocks used in expression analysis contains size 1 const columns for constant folding and
|
/// Blocks used in expression analysis contains size 1 const columns for constant folding and
|
||||||
/// null non-const columns to avoid useless memory allocations. However, a valid block sample
|
/// null non-const columns to avoid useless memory allocations. However, a valid block sample
|
||||||
/// requires all columns to be of size 0, thus we need to sanitize the block here.
|
/// requires all columns to be of size 0, thus we need to sanitize the block here.
|
||||||
@ -965,10 +959,7 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan)
|
|||||||
executeImpl(query_plan, std::move(input_pipe));
|
executeImpl(query_plan, std::move(input_pipe));
|
||||||
|
|
||||||
/// We must guarantee that result structure is the same as in getSampleBlock()
|
/// We must guarantee that result structure is the same as in getSampleBlock()
|
||||||
///
|
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
|
||||||
/// But if it's a projection query, plan header does not match result_header.
|
|
||||||
/// TODO: add special stage for InterpreterSelectQuery?
|
|
||||||
if (!options.is_projection_query && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
|
|
||||||
{
|
{
|
||||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||||
@ -1476,12 +1467,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
|||||||
if (expressions.hasHaving() && query.group_by_with_totals && (query.group_by_with_rollup || query.group_by_with_cube))
|
if (expressions.hasHaving() && query.group_by_with_totals && (query.group_by_with_rollup || query.group_by_with_cube))
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
|
||||||
|
|
||||||
if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
|
||||||
{
|
|
||||||
query_info.projection->aggregate_overflow_row = aggregate_overflow_row;
|
|
||||||
query_info.projection->aggregate_final = aggregate_final;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.only_analyze)
|
if (options.only_analyze)
|
||||||
{
|
{
|
||||||
auto read_nothing = std::make_unique<ReadNothingStep>(source_header);
|
auto read_nothing = std::make_unique<ReadNothingStep>(source_header);
|
||||||
@ -1550,11 +1535,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
|||||||
LOG_TRACE(log, "{} -> {}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
|
LOG_TRACE(log, "{} -> {}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "InputOrderInfo is set for projection and for query");
|
|
||||||
InputOrderInfoPtr input_order_info_for_order;
|
InputOrderInfoPtr input_order_info_for_order;
|
||||||
if (!expressions.need_aggregate)
|
if (!expressions.need_aggregate)
|
||||||
input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info;
|
input_order_info_for_order = query_info.input_order_info;
|
||||||
|
|
||||||
if (options.to_stage > QueryProcessingStage::FetchColumns)
|
if (options.to_stage > QueryProcessingStage::FetchColumns)
|
||||||
{
|
{
|
||||||
@ -1615,7 +1598,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
|||||||
{
|
{
|
||||||
// If there is a storage that supports prewhere, this will always be nullptr
|
// If there is a storage that supports prewhere, this will always be nullptr
|
||||||
// Thus, we don't actually need to check if projection is active.
|
// Thus, we don't actually need to check if projection is active.
|
||||||
if (!query_info.projection && expressions.filter_info)
|
if (expressions.filter_info)
|
||||||
{
|
{
|
||||||
auto row_level_security_step = std::make_unique<FilterStep>(
|
auto row_level_security_step = std::make_unique<FilterStep>(
|
||||||
query_plan.getCurrentDataStream(),
|
query_plan.getCurrentDataStream(),
|
||||||
@ -1789,7 +1772,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!query_info.projection && expressions.hasWhere())
|
if (expressions.hasWhere())
|
||||||
executeWhere(query_plan, expressions.before_where, expressions.remove_where_filter);
|
executeWhere(query_plan, expressions.before_where, expressions.remove_where_filter);
|
||||||
|
|
||||||
if (expressions.need_aggregate)
|
if (expressions.need_aggregate)
|
||||||
@ -2057,15 +2040,13 @@ static void executeMergeAggregatedImpl(
|
|||||||
query_plan.addStep(std::move(merging_aggregated));
|
query_plan.addStep(std::move(merging_aggregated));
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpreterSelectQuery::addEmptySourceToQueryPlan(
|
void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info)
|
||||||
QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info, const ContextPtr & context_)
|
|
||||||
{
|
{
|
||||||
Pipe pipe(std::make_shared<NullSource>(source_header));
|
Pipe pipe(std::make_shared<NullSource>(source_header));
|
||||||
|
|
||||||
PrewhereInfoPtr prewhere_info_ptr = query_info.projection ? query_info.projection->prewhere_info : query_info.prewhere_info;
|
if (query_info.prewhere_info)
|
||||||
if (prewhere_info_ptr)
|
|
||||||
{
|
{
|
||||||
auto & prewhere_info = *prewhere_info_ptr;
|
auto & prewhere_info = *query_info.prewhere_info;
|
||||||
|
|
||||||
if (prewhere_info.row_level_filter)
|
if (prewhere_info.row_level_filter)
|
||||||
{
|
{
|
||||||
@ -2088,50 +2069,6 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
|
|||||||
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||||
read_from_pipe->setStepDescription("Read from NullSource");
|
read_from_pipe->setStepDescription("Read from NullSource");
|
||||||
query_plan.addStep(std::move(read_from_pipe));
|
query_plan.addStep(std::move(read_from_pipe));
|
||||||
|
|
||||||
if (query_info.projection)
|
|
||||||
{
|
|
||||||
if (query_info.projection->before_where)
|
|
||||||
{
|
|
||||||
auto where_step = std::make_unique<FilterStep>(
|
|
||||||
query_plan.getCurrentDataStream(),
|
|
||||||
query_info.projection->before_where,
|
|
||||||
query_info.projection->where_column_name,
|
|
||||||
query_info.projection->remove_where_filter);
|
|
||||||
|
|
||||||
where_step->setStepDescription("WHERE");
|
|
||||||
query_plan.addStep(std::move(where_step));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
|
||||||
{
|
|
||||||
if (query_info.projection->before_aggregation)
|
|
||||||
{
|
|
||||||
auto expression_before_aggregation
|
|
||||||
= std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), query_info.projection->before_aggregation);
|
|
||||||
expression_before_aggregation->setStepDescription("Before GROUP BY");
|
|
||||||
query_plan.addStep(std::move(expression_before_aggregation));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Let's just choose the safe option since we don't know the value of `to_stage` here.
|
|
||||||
const bool should_produce_results_in_order_of_bucket_number = true;
|
|
||||||
|
|
||||||
// It is used to determine if we should use memory bound merging strategy. Maybe it makes sense for projections, but so far this case is just left untouched.
|
|
||||||
SortDescription group_by_sort_description;
|
|
||||||
|
|
||||||
executeMergeAggregatedImpl(
|
|
||||||
query_plan,
|
|
||||||
query_info.projection->aggregate_overflow_row,
|
|
||||||
query_info.projection->aggregate_final,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
context_->getSettingsRef(),
|
|
||||||
query_info.projection->aggregation_keys,
|
|
||||||
query_info.projection->aggregate_descriptions,
|
|
||||||
should_produce_results_in_order_of_bucket_number,
|
|
||||||
std::move(group_by_sort_description));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RowPolicyFilterPtr InterpreterSelectQuery::getRowPolicyFilter() const
|
RowPolicyFilterPtr InterpreterSelectQuery::getRowPolicyFilter() const
|
||||||
@ -2575,80 +2512,47 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
|||||||
|
|
||||||
/// Create optimizer with prepared actions.
|
/// Create optimizer with prepared actions.
|
||||||
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
|
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
|
||||||
if ((optimize_read_in_order || optimize_aggregation_in_order)
|
if (optimize_read_in_order)
|
||||||
&& (!query_info.projection || query_info.projection->complete))
|
|
||||||
{
|
{
|
||||||
if (optimize_read_in_order)
|
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
{
|
query,
|
||||||
if (query_info.projection)
|
analysis_result.order_by_elements_actions,
|
||||||
{
|
getSortDescription(query, context),
|
||||||
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
query_info.syntax_analyzer_result);
|
||||||
// TODO Do we need a projection variant for this field?
|
|
||||||
query,
|
|
||||||
analysis_result.order_by_elements_actions,
|
|
||||||
getSortDescription(query, context),
|
|
||||||
query_info.syntax_analyzer_result);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
|
||||||
query,
|
|
||||||
analysis_result.order_by_elements_actions,
|
|
||||||
getSortDescription(query, context),
|
|
||||||
query_info.syntax_analyzer_result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (optimize_aggregation_in_order)
|
|
||||||
{
|
|
||||||
if (query_info.projection)
|
|
||||||
{
|
|
||||||
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
|
||||||
query,
|
|
||||||
query_info.projection->group_by_elements_actions,
|
|
||||||
query_info.projection->group_by_elements_order_descr,
|
|
||||||
query_info.syntax_analyzer_result);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
|
||||||
query,
|
|
||||||
analysis_result.group_by_elements_actions,
|
|
||||||
getSortDescriptionFromGroupBy(query),
|
|
||||||
query_info.syntax_analyzer_result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
|
/// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
|
||||||
UInt64 limit = (query.hasFiltration() || query.groupBy()) ? 0 : getLimitForSorting(query, context);
|
UInt64 limit = query.hasFiltration() ? 0 : getLimitForSorting(query, context);
|
||||||
if (query_info.projection)
|
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit);
|
||||||
query_info.projection->input_order_info
|
}
|
||||||
= query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context, limit);
|
else if (optimize_aggregation_in_order)
|
||||||
else
|
{
|
||||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit);
|
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
|
query,
|
||||||
|
analysis_result.group_by_elements_actions,
|
||||||
|
getSortDescriptionFromGroupBy(query),
|
||||||
|
query_info.syntax_analyzer_result);
|
||||||
|
|
||||||
|
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, /*limit=*/ 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
query_info.storage_limits = std::make_shared<StorageLimitsList>(storage_limits);
|
query_info.storage_limits = std::make_shared<StorageLimitsList>(storage_limits);
|
||||||
|
|
||||||
query_info.settings_limit_offset_done = options.settings_limit_offset_done;
|
query_info.settings_limit_offset_done = options.settings_limit_offset_done;
|
||||||
storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams);
|
storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams);
|
||||||
|
|
||||||
if (context->hasQueryContext() && !options.is_internal)
|
if (context->hasQueryContext() && !options.is_internal)
|
||||||
{
|
{
|
||||||
const String view_name{};
|
|
||||||
auto local_storage_id = storage->getStorageID();
|
auto local_storage_id = storage->getStorageID();
|
||||||
context->getQueryContext()->addQueryAccessInfo(
|
context->getQueryContext()->addQueryAccessInfo(
|
||||||
backQuoteIfNeed(local_storage_id.getDatabaseName()),
|
backQuoteIfNeed(local_storage_id.getDatabaseName()),
|
||||||
local_storage_id.getFullTableName(),
|
local_storage_id.getFullTableName(),
|
||||||
required_columns,
|
required_columns);
|
||||||
query_info.projection ? query_info.projection->desc->name : "",
|
|
||||||
view_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create step which reads from empty source if storage has no data.
|
/// Create step which reads from empty source if storage has no data.
|
||||||
if (!query_plan.isInitialized())
|
if (!query_plan.isInitialized())
|
||||||
{
|
{
|
||||||
auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
|
auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
|
||||||
addEmptySourceToQueryPlan(query_plan, header, query_info, context);
|
addEmptySourceToQueryPlan(query_plan, header, query_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -2757,13 +2661,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
|||||||
expression_before_aggregation->setStepDescription("Before GROUP BY");
|
expression_before_aggregation->setStepDescription("Before GROUP BY");
|
||||||
query_plan.addStep(std::move(expression_before_aggregation));
|
query_plan.addStep(std::move(expression_before_aggregation));
|
||||||
|
|
||||||
if (options.is_projection_query)
|
|
||||||
return;
|
|
||||||
|
|
||||||
AggregateDescriptions aggregates = query_analyzer->aggregates();
|
AggregateDescriptions aggregates = query_analyzer->aggregates();
|
||||||
|
|
||||||
const Settings & settings = context->getSettingsRef();
|
const Settings & settings = context->getSettingsRef();
|
||||||
|
|
||||||
const auto & keys = query_analyzer->aggregationKeys().getNames();
|
const auto & keys = query_analyzer->aggregationKeys().getNames();
|
||||||
|
|
||||||
auto aggregator_params = getAggregatorParams(
|
auto aggregator_params = getAggregatorParams(
|
||||||
@ -2827,13 +2726,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
|||||||
|
|
||||||
void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets)
|
void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets)
|
||||||
{
|
{
|
||||||
/// If aggregate projection was chosen for table, avoid adding MergeAggregated.
|
|
||||||
/// It is already added by storage (because of performance issues).
|
|
||||||
/// TODO: We should probably add another one processing stage for storage?
|
|
||||||
/// WithMergeableStateAfterAggregation is not ok because, e.g., it skips sorting after aggregation.
|
|
||||||
if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
|
||||||
return;
|
|
||||||
|
|
||||||
const Settings & settings = context->getSettingsRef();
|
const Settings & settings = context->getSettingsRef();
|
||||||
|
|
||||||
/// Used to determine if we should use memory bound merging strategy.
|
/// Used to determine if we should use memory bound merging strategy.
|
||||||
|
@ -117,7 +117,7 @@ public:
|
|||||||
bool hasAggregation() const { return query_analyzer->hasAggregation(); }
|
bool hasAggregation() const { return query_analyzer->hasAggregation(); }
|
||||||
|
|
||||||
static void addEmptySourceToQueryPlan(
|
static void addEmptySourceToQueryPlan(
|
||||||
QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info, const ContextPtr & context_);
|
QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info);
|
||||||
|
|
||||||
Names getRequiredColumns() { return required_columns; }
|
Names getRequiredColumns() { return required_columns; }
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ bool isStorageTouchedByMutations(
|
|||||||
if (context->getSettingsRef().allow_experimental_analyzer)
|
if (context->getSettingsRef().allow_experimental_analyzer)
|
||||||
{
|
{
|
||||||
auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context);
|
auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context);
|
||||||
InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits().ignoreProjections());
|
InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits());
|
||||||
io = interpreter.execute();
|
io = interpreter.execute();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -200,7 +200,7 @@ bool isStorageTouchedByMutations(
|
|||||||
/// For some reason it may copy context and give it into ExpressionTransform
|
/// For some reason it may copy context and give it into ExpressionTransform
|
||||||
/// after that we will use context from destroyed stack frame in our stream.
|
/// after that we will use context from destroyed stack frame in our stream.
|
||||||
interpreter_select_query.emplace(
|
interpreter_select_query.emplace(
|
||||||
select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
|
select_query, context, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits());
|
||||||
|
|
||||||
io = interpreter_select_query->execute();
|
io = interpreter_select_query->execute();
|
||||||
}
|
}
|
||||||
@ -404,7 +404,7 @@ MutationsInterpreter::MutationsInterpreter(
|
|||||||
, available_columns(std::move(available_columns_))
|
, available_columns(std::move(available_columns_))
|
||||||
, context(Context::createCopy(context_))
|
, context(Context::createCopy(context_))
|
||||||
, settings(std::move(settings_))
|
, settings(std::move(settings_))
|
||||||
, select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits().ignoreProjections())
|
, select_limits(SelectQueryOptions().analyze(!settings.can_execute).ignoreLimits())
|
||||||
{
|
{
|
||||||
prepare(!settings.can_execute);
|
prepare(!settings.can_execute);
|
||||||
}
|
}
|
||||||
|
@ -33,14 +33,6 @@ struct SelectQueryOptions
|
|||||||
bool remove_duplicates = false;
|
bool remove_duplicates = false;
|
||||||
bool ignore_quota = false;
|
bool ignore_quota = false;
|
||||||
bool ignore_limits = false;
|
bool ignore_limits = false;
|
||||||
/// This flag is needed to analyze query ignoring table projections.
|
|
||||||
/// It is needed because we build another one InterpreterSelectQuery while analyzing projections.
|
|
||||||
/// It helps to avoid infinite recursion.
|
|
||||||
bool ignore_projections = false;
|
|
||||||
/// This flag is also used for projection analysis.
|
|
||||||
/// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform.
|
|
||||||
/// It is also used to avoid adding aggregating step when aggregate projection is chosen.
|
|
||||||
bool is_projection_query = false;
|
|
||||||
/// This flag is needed for projection description.
|
/// This flag is needed for projection description.
|
||||||
/// Otherwise, keys for GROUP BY may be removed as constants.
|
/// Otherwise, keys for GROUP BY may be removed as constants.
|
||||||
bool ignore_ast_optimizations = false;
|
bool ignore_ast_optimizations = false;
|
||||||
@ -119,18 +111,6 @@ struct SelectQueryOptions
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
SelectQueryOptions & ignoreProjections(bool value = true)
|
|
||||||
{
|
|
||||||
ignore_projections = value;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
SelectQueryOptions & projectionQuery(bool value = true)
|
|
||||||
{
|
|
||||||
is_projection_query = value;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
SelectQueryOptions & ignoreAlias(bool value = true)
|
SelectQueryOptions & ignoreAlias(bool value = true)
|
||||||
{
|
{
|
||||||
ignore_alias = value;
|
ignore_alias = value;
|
||||||
|
@ -1435,11 +1435,12 @@ void executeQuery(
|
|||||||
const auto & compression_method_node = ast_query_with_output->compression->as<ASTLiteral &>();
|
const auto & compression_method_node = ast_query_with_output->compression->as<ASTLiteral &>();
|
||||||
compression_method = compression_method_node.value.safeGet<std::string>();
|
compression_method = compression_method_node.value.safeGet<std::string>();
|
||||||
}
|
}
|
||||||
|
const auto & settings = context->getSettingsRef();
|
||||||
compressed_buffer = wrapWriteBufferWithCompressionMethod(
|
compressed_buffer = wrapWriteBufferWithCompressionMethod(
|
||||||
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
|
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
|
||||||
chooseCompressionMethod(out_file, compression_method),
|
chooseCompressionMethod(out_file, compression_method),
|
||||||
/* compression level = */ 3
|
/* compression level = */ static_cast<int>(settings.output_format_compression_level),
|
||||||
|
/* zstd_window_log = */ static_cast<int>(settings.output_format_compression_zstd_window_log)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -846,9 +846,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
|||||||
query_context->getQueryContext()->addQueryAccessInfo(
|
query_context->getQueryContext()->addQueryAccessInfo(
|
||||||
backQuoteIfNeed(local_storage_id.getDatabaseName()),
|
backQuoteIfNeed(local_storage_id.getDatabaseName()),
|
||||||
local_storage_id.getFullTableName(),
|
local_storage_id.getFullTableName(),
|
||||||
columns_names,
|
columns_names);
|
||||||
{},
|
|
||||||
{});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -455,8 +455,7 @@ QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTyp
|
|||||||
SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context)
|
SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context)
|
||||||
{
|
{
|
||||||
SelectQueryInfo select_query_info;
|
SelectQueryInfo select_query_info;
|
||||||
select_query_info.original_query = queryNodeToSelectQuery(query_tree);
|
select_query_info.query = queryNodeToSelectQuery(query_tree);
|
||||||
select_query_info.query = select_query_info.original_query;
|
|
||||||
select_query_info.query_tree = query_tree;
|
select_query_info.query_tree = query_tree;
|
||||||
select_query_info.planner_context = planner_context;
|
select_query_info.planner_context = planner_context;
|
||||||
return select_query_info;
|
return select_query_info;
|
||||||
|
@ -603,18 +603,21 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
|
|||||||
memcpy(buf, istr.position(), bytes_to_copy);
|
memcpy(buf, istr.position(), bytes_to_copy);
|
||||||
buf[bytes_to_copy] = 0;
|
buf[bytes_to_copy] = 0;
|
||||||
|
|
||||||
char * pos_double = buf;
|
/// Skip leading zeroes - we don't want any funny octal business
|
||||||
|
char * non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy);
|
||||||
|
|
||||||
|
char * pos_double = non_zero_buf;
|
||||||
errno = 0;
|
errno = 0;
|
||||||
Float64 float_value = std::strtod(buf, &pos_double);
|
Float64 float_value = std::strtod(non_zero_buf, &pos_double);
|
||||||
if (pos_double == buf || errno == ERANGE || float_value < 0)
|
if (pos_double == non_zero_buf || errno == ERANGE || float_value < 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (negative)
|
if (negative)
|
||||||
float_value = -float_value;
|
float_value = -float_value;
|
||||||
|
|
||||||
char * pos_integer = buf;
|
char * pos_integer = non_zero_buf;
|
||||||
errno = 0;
|
errno = 0;
|
||||||
UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
|
UInt64 uint_value = std::strtoull(non_zero_buf, &pos_integer, 0);
|
||||||
if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
|
if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
|
||||||
{
|
{
|
||||||
istr.position() += pos_integer - buf;
|
istr.position() += pos_integer - buf;
|
||||||
|
@ -448,6 +448,7 @@ PODArray<char> & compress(PODArray<char> & source, PODArray<char> & scratch, Com
|
|||||||
std::move(dest_buf),
|
std::move(dest_buf),
|
||||||
method,
|
method,
|
||||||
/*level*/ 3,
|
/*level*/ 3,
|
||||||
|
/*zstd_window_log*/ 0,
|
||||||
source.size(),
|
source.size(),
|
||||||
/*existing_memory*/ source.data());
|
/*existing_memory*/ source.data());
|
||||||
chassert(compressed_buf->position() == source.data());
|
chassert(compressed_buf->position() == source.data());
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user