From 10e0219250837427d8302132f892f722ea37b3a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 23 Oct 2024 21:45:39 +0200 Subject: [PATCH] Remove idxd-config library which has incompatible license --- .gitmodules | 6 - contrib/CMakeLists.txt | 24 +- contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt | 126 ++- contrib/idxd-config | 1 - contrib/idxd-config-cmake/CMakeLists.txt | 23 - contrib/idxd-config-cmake/include/config.h | 159 ---- contrib/qpl | 1 - contrib/qpl-cmake/CMakeLists.txt | 738 ------------------ contrib/qpl-cmake/uuid/uuid.h | 4 - .../building_and_benchmarking_deflate_qpl.md | 327 -------- programs/compressor/Compressor.cpp | 6 +- src/CMakeLists.txt | 15 +- src/Client/Connection.cpp | 2 - src/Common/config.h.in | 1 - src/Compression/CompressedReadBufferBase.cpp | 12 - src/Compression/CompressedReadBufferBase.h | 8 - .../CompressedReadBufferFromFile.cpp | 33 +- .../CompressionCodecDeflateQpl.cpp | 490 ------------ src/Compression/CompressionCodecDeflateQpl.h | 125 --- src/Compression/CompressionFactory.cpp | 6 - src/Compression/CompressionFactory.h | 4 +- .../CompressionFactoryAdditions.cpp | 14 +- src/Compression/CompressionInfo.h | 1 - src/Compression/ICompressionCodec.h | 32 - src/Core/Settings.cpp | 7 +- .../enableAllExperimentalSettings.cpp | 1 - src/Interpreters/InterpreterCreateQuery.cpp | 4 +- src/Server/TCPHandler.cpp | 2 - src/Storages/AlterCommands.cpp | 10 +- src/Storages/ColumnsDescription.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 - src/Storages/TTLDescription.cpp | 3 +- src/configure_config.cmake | 3 - tests/ci/stress.py | 1 - .../deflateqpl_compression_by_default.xml | 11 - .../configs/enable_deflateqpl_codec.xml | 7 - .../test_non_default_compression/test.py | 73 -- ...st_deflate_qpl_codec_compression.reference | 6 - ...804_test_deflate_qpl_codec_compression.sql | 49 -- 39 files changed, 64 insertions(+), 2275 deletions(-) delete mode 160000 contrib/idxd-config delete mode 100644 contrib/idxd-config-cmake/CMakeLists.txt delete mode 100644 contrib/idxd-config-cmake/include/config.h delete mode 160000 contrib/qpl delete mode 100644 contrib/qpl-cmake/CMakeLists.txt delete mode 100644 contrib/qpl-cmake/uuid/uuid.h delete mode 100644 docs/en/development/building_and_benchmarking_deflate_qpl.md delete mode 100644 src/Compression/CompressionCodecDeflateQpl.cpp delete mode 100644 src/Compression/CompressionCodecDeflateQpl.h delete mode 100644 tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml delete mode 100644 tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml delete mode 100644 tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference delete mode 100644 tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql diff --git a/.gitmodules b/.gitmodules index bd61c52a5e0..bbc8fc7d06c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -227,12 +227,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng -[submodule "contrib/qpl"] - path = contrib/qpl - url = https://github.com/intel/qpl -[submodule "contrib/idxd-config"] - path = contrib/idxd-config - url = https://github.com/intel/idxd-config [submodule "contrib/QAT-ZSTD-Plugin"] path = contrib/QAT-ZSTD-Plugin url = https://github.com/intel/QAT-ZSTD-Plugin diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b102b2919d9..fa0f95245f2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -178,35 +178,13 @@ add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) -if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) - option (ENABLE_QPL "Enable Intel® Query Processing Library (QPL)" ${ENABLE_LIBRARIES}) -elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 with SSE 4.2 or higher") -endif() -if (ENABLE_QPL) - add_contrib (idxd-config-cmake idxd-config) - add_contrib (qpl-cmake qpl) # requires: idxd-config -else() - message(STATUS "Not using QPL") -endif () - if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QATLIB) message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") endif() if (ENABLE_QATLIB) - option (ENABLE_QAT_USDM_DRIVER "A User Space DMA-able Memory (USDM) component which allocates/frees DMA-able memory" OFF) - option (ENABLE_QAT_OUT_OF_TREE_BUILD "Using out-of-tree driver, user needs to customize ICP_ROOT variable" OFF) - set(ICP_ROOT "" CACHE STRING "ICP_ROOT variable to define the path of out-of-tree driver package") - if (ENABLE_QAT_OUT_OF_TREE_BUILD) - if (ICP_ROOT STREQUAL "") - message(FATAL_ERROR "Please define the path of out-of-tree driver package with -DICP_ROOT=xxx or disable out-of-tree build with -DENABLE_QAT_OUT_OF_TREE_BUILD=OFF; \ - If you want out-of-tree build but have no package available, please download and build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html") - endif () - else() - add_contrib (qatlib-cmake qatlib) # requires: isa-l - endif () + add_contrib (qatlib-cmake qatlib) # requires: isa-l add_contrib (QAT-ZSTD-Plugin-cmake QAT-ZSTD-Plugin) else() message(STATUS "Not using QATLib") diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt index fc18092f574..5d1cfa2af14 100644 --- a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -1,85 +1,53 @@ # Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a plugin to Zstandard*(ZSTD*) for accelerating compression by QAT. -# ENABLE_QAT_OUT_OF_TREE_BUILD = 1 means kernel don't have native support, user will build and install driver from external package: https://www.intel.com/content/www/us/en/download/765501.html -# meanwhile, user need to set ICP_ROOT environment variable which point to the root directory of QAT driver source tree. -# ENABLE_QAT_OUT_OF_TREE_BUILD = 0 means kernel has built-in qat driver, QAT-ZSTD-PLUGIN just has dependency on qatlib. -if (ENABLE_QAT_OUT_OF_TREE_BUILD) - message(STATUS "Intel QATZSTD out-of-tree build, ICP_ROOT:${ICP_ROOT}") +message(STATUS "Intel QATZSTD in-tree build") +set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") +set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") +set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") - set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") - set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") - set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") - set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") - set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") - set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") - set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") - set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") - set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") - if (ENABLE_QAT_USDM_DRIVER) - add_definitions(-DENABLE_USDM_DRV) - endif() - add_library(_qatzstd_plugin ${QATZSTD_SRC}) - target_link_libraries (_qatzstd_plugin PUBLIC ${USDM_LIBRARY} ${QAT_S_LIBRARY}) - target_include_directories(_qatzstd_plugin - SYSTEM PUBLIC "${QATZSTD_SRC_DIR}" - PRIVATE ${QAT_INCLUDE_DIR} - ${QAT_DC_INCLUDE_DIR} - ${QAT_AL_INCLUDE_DIR} - ${QAT_USDM_INCLUDE_DIR} - ${ZSTD_LIBRARY_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0) - add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) -else () # In-tree build - message(STATUS "Intel QATZSTD in-tree build") - set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") - set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") - set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") +# please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html +set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") +set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") +set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") +set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") +set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") +set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") +set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") - # please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html - set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") - set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") - set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") - set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") - set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") - set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") - set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") - set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") - set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") +file(MAKE_DIRECTORY + "${LIBQAT_HEADER_DIR}/qat" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) - file(MAKE_DIRECTORY - "${LIBQAT_HEADER_DIR}/qat" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - - if (ENABLE_QAT_USDM_DRIVER) - add_definitions(-DENABLE_USDM_DRV) - endif() - - add_library(_qatzstd_plugin ${QATZSTD_SRC}) - target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) - target_include_directories(_qatzstd_plugin PRIVATE - ${QAT_INCLUDE_DIR} - ${QAT_DC_INCLUDE_DIR} - ${QAT_AL_INCLUDE_DIR} - ${QAT_USDM_INCLUDE_DIR} - ${ZSTD_LIBRARY_DIR} - ${LIBQAT_HEADER_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) - target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) - add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) -endif () +if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) +endif() +add_library(_qatzstd_plugin ${QATZSTD_SRC}) +target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) +target_include_directories(_qatzstd_plugin PRIVATE + ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR} + ${LIBQAT_HEADER_DIR}) +target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) +target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) +add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) diff --git a/contrib/idxd-config b/contrib/idxd-config deleted file mode 160000 index a836ce0e420..00000000000 --- a/contrib/idxd-config +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1 diff --git a/contrib/idxd-config-cmake/CMakeLists.txt b/contrib/idxd-config-cmake/CMakeLists.txt deleted file mode 100644 index 030252ec8e6..00000000000 --- a/contrib/idxd-config-cmake/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -## accel_config is the utility library required by QPL-Deflate codec for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). -set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") -set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config-cmake/include") -set (SRCS - "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" - "${LIBACCEL_SOURCE_DIR}/util/log.c" - "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" -) - -add_library(_accel-config ${SRCS}) - -target_compile_options(_accel-config PRIVATE "-D_GNU_SOURCE") - -target_include_directories(_accel-config BEFORE - PRIVATE ${UUID_DIR} - PRIVATE ${LIBACCEL_HEADER_DIR} - PRIVATE ${LIBACCEL_SOURCE_DIR}) - -target_include_directories(_accel-config SYSTEM BEFORE - PUBLIC ${LIBACCEL_SOURCE_DIR}/accfg) - -add_library(ch_contrib::accel-config ALIAS _accel-config) diff --git a/contrib/idxd-config-cmake/include/config.h b/contrib/idxd-config-cmake/include/config.h deleted file mode 100644 index f03b0eac0b0..00000000000 --- a/contrib/idxd-config-cmake/include/config.h +++ /dev/null @@ -1,159 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Define if building universal (internal helper macro) */ -/* #undef AC_APPLE_UNIVERSAL_BUILD */ - -/* Debug messages. */ -/* #undef ENABLE_DEBUG */ - -/* Documentation / man pages. */ -/* #define ENABLE_DOCS */ - -/* System logging. */ -#define ENABLE_LOGGING 1 - -/* accfg test support */ -/* #undef ENABLE_TEST */ - -/* Define to 1 if big-endian-arch */ -/* #undef HAVE_BIG_ENDIAN */ - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LINUX_VERSION_H 1 - -/* Define to 1 if little-endian-arch */ -#define HAVE_LITTLE_ENDIAN 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `secure_getenv' function. */ -#define HAVE_SECURE_GETENV 1 - -/* Define to 1 if you have statement expressions. */ -#define HAVE_STATEMENT_EXPR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if typeof works with your compiler. */ -#define HAVE_TYPEOF 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if using libuuid */ -#define HAVE_UUID 1 - -/* Define to 1 if you have the `__secure_getenv' function. */ -/* #undef HAVE___SECURE_GETENV */ - -/* Define to the sub-directory where libtool stores uninstalled libraries. */ -#define LT_OBJDIR ".libs/" - -/* Name of package */ -#define PACKAGE "accel-config" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "linux-dsa@lists.01.org" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "accel-config" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "accel-config 3.5.2.gitf6605c41" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "accel-config" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "https://github.com/xxx/accel-config" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "3.5.2.gitf6605c41" - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Enable extensions on AIX 3, Interix. */ -#ifndef _ALL_SOURCE -# define _ALL_SOURCE 1 -#endif -/* Enable GNU extensions on systems that have them. */ -#ifndef _GNU_SOURCE -# define _GNU_SOURCE 1 -#endif -/* Enable threading extensions on Solaris. */ -#ifndef _POSIX_PTHREAD_SEMANTICS -# define _POSIX_PTHREAD_SEMANTICS 1 -#endif -/* Enable extensions on HP NonStop. */ -#ifndef _TANDEM_SOURCE -# define _TANDEM_SOURCE 1 -#endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# define __EXTENSIONS__ 1 -#endif - - -/* Version number of package */ -#define VERSION "3.5.2.gitf6605c41" - -/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most - significant byte first (like Motorola and SPARC, unlike Intel). */ -#if defined AC_APPLE_UNIVERSAL_BUILD -# if defined __BIG_ENDIAN__ -# define WORDS_BIGENDIAN 1 -# endif -#else -# ifndef WORDS_BIGENDIAN -/* # undef WORDS_BIGENDIAN */ -# endif -#endif - -/* Enable large inode numbers on Mac OS X 10.5. */ -#ifndef _DARWIN_USE_64_BIT_INODE -# define _DARWIN_USE_64_BIT_INODE 1 -#endif - -/* Number of bits in a file offset, on hosts where this is settable. */ -/* #undef _FILE_OFFSET_BITS */ - -/* Define for large files, on AIX-style hosts. */ -/* #undef _LARGE_FILES */ - -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ - -/* Define to __typeof__ if your compiler spells it that way. */ -/* #undef typeof */ diff --git a/contrib/qpl b/contrib/qpl deleted file mode 160000 index c2ced94c53c..00000000000 --- a/contrib/qpl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c2ced94c53c1ee22191201a59878e9280bc9b9b8 diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt deleted file mode 100644 index 89332ae0f7a..00000000000 --- a/contrib/qpl-cmake/CMakeLists.txt +++ /dev/null @@ -1,738 +0,0 @@ -## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available. -set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl") -set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources") -set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl") -set (EFFICIENT_WAIT OFF) -set (LOG_HW_INIT OFF) -set (SANITIZE_MEMORY OFF) -set (SANITIZE_THREADS OFF) -set (LIB_FUZZING_ENGINE OFF) -set (DYNAMIC_LOADING_LIBACCEL_CONFIG OFF) - -function(GetLibraryVersion _content _outputVar) - string(REGEX MATCHALL "QPL VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") - SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE) -endfunction() - -set (QPL_VERSION 1.6.0) - -message(STATUS "Intel QPL version: ${QPL_VERSION}") - -# There are 5 source subdirectories under $QPL_SRC_DIR: c_api, core-iaa, core-sw, middle-layer and isal. -# Generate 8 library targets: qpl_c_api, core_iaa, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, middle_layer_lib, isal and isal_asm, -# which are then combined into static or shared qpl. -# Output ch_contrib::qpl by linking with 8 library targets. - -# Note, QPL has integrated a customized version of ISA-L to meet specific needs. -# This version has been significantly modified and there are no plans to maintain compatibility with the upstream version -# or upgrade the current copy. - -## cmake/CompileOptions.cmake and automatic wrappers generation - -# ========================================================================== -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT -# ========================================================================== - -set(QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS "-fno-exceptions;-fno-rtti") - -function(modify_standard_language_flag) - # Declaring function parameters - set(OPTIONS "") - set(ONE_VALUE_ARGS - LANGUAGE_NAME - FLAG_NAME - NEW_FLAG_VALUE) - set(MULTI_VALUE_ARGS "") - - # Parsing function parameters - cmake_parse_arguments(MODIFY - "${OPTIONS}" - "${ONE_VALUE_ARGS}" - "${MULTI_VALUE_ARGS}" - ${ARGN}) - - # Variables - set(FLAG_REGULAR_EXPRESSION "${MODIFY_FLAG_NAME}.*[ ]*") - set(NEW_VALUE "${MODIFY_FLAG_NAME}${MODIFY_NEW_FLAG_VALUE}") - - # Replacing specified flag with new value - string(REGEX REPLACE - ${FLAG_REGULAR_EXPRESSION} ${NEW_VALUE} - NEW_COMPILE_FLAGS - "${CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS}") - - # Returning the value - set(CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS ${NEW_COMPILE_FLAGS} PARENT_SCOPE) -endfunction() - -function(get_function_name_with_default_bit_width in_function_name bit_width out_function_name) - - if(in_function_name MATCHES ".*_i") - - string(REPLACE "_i" "" in_function_name ${in_function_name}) - - set(${out_function_name} "${in_function_name}_${bit_width}_i" PARENT_SCOPE) - - else() - - set(${out_function_name} "${in_function_name}_${bit_width}" PARENT_SCOPE) - - endif() - -endfunction() - -macro(get_list_of_supported_optimizations PLATFORMS_LIST) - list(APPEND PLATFORMS_LIST "") - list(APPEND PLATFORMS_LIST "px") - list(APPEND PLATFORMS_LIST "avx512") -endmacro(get_list_of_supported_optimizations) - -function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST) - list(APPEND UNPACK_POSTFIX_LIST "") - list(APPEND UNPACK_PRLE_POSTFIX_LIST "") - list(APPEND PACK_POSTFIX_LIST "") - list(APPEND PACK_INDEX_POSTFIX_LIST "") - list(APPEND SCAN_POSTFIX_LIST "") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "") - list(APPEND DEFAULT_BIT_WIDTH_LIST "") - - #create list of functions that use only 8u 16u 32u postfixes - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "unpack_prle") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract_i") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select_i") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "expand") - - #create default bit width list - list(APPEND DEFAULT_BIT_WIDTH_LIST "8u") - list(APPEND DEFAULT_BIT_WIDTH_LIST "16u") - list(APPEND DEFAULT_BIT_WIDTH_LIST "32u") - - #create scan kernel postfixes - list(APPEND SCAN_COMPARATOR_LIST "") - - list(APPEND SCAN_COMPARATOR_LIST "eq") - list(APPEND SCAN_COMPARATOR_LIST "ne") - list(APPEND SCAN_COMPARATOR_LIST "lt") - list(APPEND SCAN_COMPARATOR_LIST "le") - list(APPEND SCAN_COMPARATOR_LIST "gt") - list(APPEND SCAN_COMPARATOR_LIST "ge") - list(APPEND SCAN_COMPARATOR_LIST "range") - list(APPEND SCAN_COMPARATOR_LIST "not_range") - - foreach(SCAN_COMPARATOR IN LISTS SCAN_COMPARATOR_LIST) - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_8u") - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_16u8u") - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_32u8u") - endforeach() - - # create unpack kernel postfixes - foreach(input_width RANGE 1 32 1) - if(input_width LESS 8 OR input_width EQUAL 8) - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u8u") - - elseif(input_width LESS 16 OR input_width EQUAL 16) - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u16u") - - else() - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u32u") - endif() - endforeach() - - # create pack kernel postfixes - foreach(output_width RANGE 1 8 1) - list(APPEND PACK_POSTFIX_LIST "_8u${output_width}u") - endforeach() - - foreach(output_width RANGE 9 16 1) - list(APPEND PACK_POSTFIX_LIST "_16u${output_width}u") - endforeach() - - foreach(output_width RANGE 17 32 1) - list(APPEND PACK_POSTFIX_LIST "_32u${output_width}u") - endforeach() - - list(APPEND PACK_POSTFIX_LIST "_8u16u") - list(APPEND PACK_POSTFIX_LIST "_8u32u") - list(APPEND PACK_POSTFIX_LIST "_16u32u") - - # create pack index kernel postfixes - list(APPEND PACK_INDEX_POSTFIX_LIST "_nu") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u16u") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u32u") - - # write to file - file(MAKE_DIRECTORY ${current_directory}/generated) - - foreach(PLATFORM_VALUE IN LISTS PLATFORMS_LIST) - set(directory "${current_directory}/generated") - set(PLATFORM_PREFIX "${PLATFORM_VALUE}_") - - # - # Write unpack table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "unpack_table_t ${PLATFORM_PREFIX}unpack_table = {\n") - - #write LE kernels - foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST) - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack${UNPACK_POSTFIX},\n") - endforeach() - - #write BE kernels - - #get last element of the list - set(LAST_ELEMENT "") - list(GET UNPACK_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST) - - if(UNPACK_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "}\n") - - # - # Write pack table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "pack_table_t ${PLATFORM_PREFIX}pack_table = {\n") - - #write LE kernels - foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST) - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack${PACK_POSTFIX},\n") - endforeach() - - #write BE kernels - - #get last element of the list - set(LAST_ELEMENT "") - list(GET PACK_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST) - - if(PACK_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "}\n") - - # - # Write scan table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "scan_table_t ${PLATFORM_PREFIX}scan_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST) - - if(SCAN_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "}\n") - - # - # Write scan_i table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "scan_i_table_t ${PLATFORM_PREFIX}scan_i_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST) - - if(SCAN_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i,\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "}\n") - - # - # Write pack_index table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "pack_index_table_t ${PLATFORM_PREFIX}pack_index_table = {\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_nu,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u32u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_be_nu,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "}\n") - - # - # Write default bit width functions - # - foreach(DEAULT_BIT_WIDTH_FUNCTION IN LISTS DEFAULT_BIT_WIDTH_FUNCTIONS_LIST) - file(WRITE ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "${DEAULT_BIT_WIDTH_FUNCTION}_table_t ${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET DEFAULT_BIT_WIDTH_LIST -1 LAST_ELEMENT) - - foreach(BIT_WIDTH IN LISTS DEFAULT_BIT_WIDTH_LIST) - - set(FUNCTION_NAME "") - get_function_name_with_default_bit_width(${DEAULT_BIT_WIDTH_FUNCTION} ${BIT_WIDTH} FUNCTION_NAME) - - if(BIT_WIDTH STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "}\n") - endforeach() - - # - # Write aggregates table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "aggregates_table_t ${PLATFORM_PREFIX}aggregates_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_bit_aggregates_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "}\n") - - # - # Write mem_copy functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "memory_copy_table_t ${PLATFORM_PREFIX}memory_copy_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "}\n") - - # - # Write mem_copy functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "zero_table_t ${PLATFORM_PREFIX}zero_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "\t${PLATFORM_PREFIX}qplc_zero_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "}\n") - - # - # Write move functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "move_table_t ${PLATFORM_PREFIX}move_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "\t${PLATFORM_PREFIX}qplc_move_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "}\n") - - # - # Write crc64 function table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "crc64_table_t ${PLATFORM_PREFIX}crc64_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "\t${PLATFORM_PREFIX}qplc_crc64};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "}\n") - - # - # Write xor_checksum function table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "xor_checksum_table_t ${PLATFORM_PREFIX}xor_checksum_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "\t${PLATFORM_PREFIX}qplc_xor_checksum_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "}\n") - - # - # Write deflate functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_slow_icf.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_hash_table.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_histogram.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "deflate_table_t ${PLATFORM_PREFIX}deflate_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}slow_deflate_icf_body),\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}deflate_histogram_reset),\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}deflate_hash_table_reset)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "}\n") - - # - # Write deflate fix functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"deflate_slow.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "deflate_fix_table_t ${PLATFORM_PREFIX}deflate_fix_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}slow_deflate_body)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "}\n") - - # - # Write setup_dictionary functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"deflate_slow_utils.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "setup_dictionary_table_t ${PLATFORM_PREFIX}setup_dictionary_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}setup_dictionary)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "}\n") - - endforeach() -endfunction() - -# [SUBDIR]isal - -enable_language(ASM_NASM) - -set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c - ${QPL_SRC_DIR}/isal/igzip/huff_codes.c - ${QPL_SRC_DIR}/isal/igzip/hufftables_c.c - ${QPL_SRC_DIR}/isal/igzip/igzip.c - ${QPL_SRC_DIR}/isal/igzip/igzip_base.c - ${QPL_SRC_DIR}/isal/igzip/flatten_ll.c - ${QPL_SRC_DIR}/isal/igzip/encode_df.c - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_base.c - ${QPL_SRC_DIR}/isal/igzip/igzip_inflate.c - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body.c - ${QPL_SRC_DIR}/isal/crc/crc_base.c - ${QPL_SRC_DIR}/isal/crc/crc64_base.c) - -set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_finish.asm - ${QPL_SRC_DIR}/isal/igzip/encode_df_04.asm - ${QPL_SRC_DIR}/isal/igzip/encode_df_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_01.asm - ${QPL_SRC_DIR}/isal/igzip/proc_heap.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body_h1_gr_bt.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_finish.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_inflate_multibinary.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_01.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_04.asm - ${QPL_SRC_DIR}/isal/igzip/rfc1951_lookup.asm - ${QPL_SRC_DIR}/isal/igzip/adler32_sse.asm - ${QPL_SRC_DIR}/isal/igzip/adler32_avx2_4.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_deflate_hash.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm - ${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by16_10.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_01.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_02.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by4.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by16_10.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_00.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_01.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_by16_10.asm) - -# Adding ISA-L library target -add_library(isal OBJECT ${ISAL_C_SRC}) -add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -# Setting external and internal interfaces for ISA-L library -target_include_directories(isal - PUBLIC $ - PUBLIC ${QPL_SRC_DIR}/isal/igzip) - -set_target_properties(isal PROPERTIES - CXX_STANDARD 11 - C_STANDARD 99) - -# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". -# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. -target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/" - PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/" - PRIVATE "-I${QPL_SRC_DIR}/isal/crc/" - PRIVATE "-DHAVE_AS_KNOWS_AVX512" - PRIVATE "-DAS_FEATURE_LEVEL=10" - PRIVATE "-DQPL_LIB") - -# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. -# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" -if (SANITIZE STREQUAL "undefined") - get_target_property(target_options isal_asm COMPILE_OPTIONS) - list(REMOVE_ITEM target_options "-fno-sanitize=undefined") - set_property(TARGET isal_asm PROPERTY COMPILE_OPTIONS ${target_options}) -endif() - -target_compile_definitions(isal PUBLIC - QPL_LIB - NDEBUG) - -# [SUBDIR]core-sw -# Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively. -# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. -# Hence, here we don't need put ENABLE_AVX512 CMake switch. - -get_list_of_supported_optimizations(PLATFORMS_LIST) - -foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) - # Find Core Sources - file(GLOB SOURCES - ${QPL_SRC_DIR}/core-sw/src/checksums/*.c - ${QPL_SRC_DIR}/core-sw/src/filtering/*.c - ${QPL_SRC_DIR}/core-sw/src/other/*.c - ${QPL_SRC_DIR}/core-sw/src/compression/*.c) - - file(GLOB DATA_SOURCES - ${QPL_SRC_DIR}/core-sw/src/data/*.c) - - # Create library - add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES}) - - set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - - target_include_directories(qplcore_${PLATFORM_ID} - PUBLIC $ - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) - - # Set specific compiler options and/or definitions based on a platform - if (${PLATFORM_ID} MATCHES "avx512") - target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2) - target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512) - else() # Create default px library - target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0) - endif() - - target_link_libraries(qplcore_${PLATFORM_ID} isal) -endforeach() - -# -# Create dispatcher between platforms and auto-generated wrappers -# -file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp) - -add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_include_directories(qplcore_sw_dispatcher - PUBLIC $) - -# Generate kernel wrappers -generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}") - -foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) - file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp) - - target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC}) - - # Set specific compiler options and/or definitions based on a platform - if (${PLATFORM_ID} MATCHES "avx512") - set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) - else() - set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) - endif() - - target_include_directories(qplcore_sw_dispatcher - PUBLIC $) -endforeach() - -set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17) - -# w/a for build compatibility with ISAL codebase -target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB) - -target_compile_options(qplcore_sw_dispatcher - PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}) - -# [SUBDIR]core-iaa -file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c - ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c - ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c - ${QPL_SRC_DIR}/core-iaa/sources/*.c) - -# Create library -add_library(core_iaa OBJECT ${HW_PATH_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_include_directories(core_iaa - PRIVATE ${UUID_DIR} - PUBLIC $ - PUBLIC $ - PRIVATE $ # status.h in own_checkers.h - PRIVATE $ # for own_checkers.h - PRIVATE $) - -target_compile_features(core_iaa PRIVATE c_std_11) - -target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK - PRIVATE $<$:LOG_HW_INIT> - PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) - -# [SUBDIR]middle-layer -file(GLOB MIDDLE_LAYER_SRC - ${QPL_SRC_DIR}/middle-layer/accelerator/*.cpp - ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp - ${QPL_SRC_DIR}/middle-layer/common/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp - ${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp - ${QPL_SRC_DIR}/middle-layer/other/*.cpp - ${QPL_SRC_DIR}/middle-layer/util/*.cpp) - -add_library(middle_layer_lib OBJECT - ${MIDDLE_LAYER_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_compile_options(middle_layer_lib - PRIVATE $<$:$<$:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>> - PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}) - -target_compile_definitions(middle_layer_lib - PUBLIC QPL_VERSION="${QPL_VERSION}" - PUBLIC $<$:LOG_HW_INIT> - PUBLIC $<$:QPL_EFFICIENT_WAIT> - PUBLIC QPL_BADARG_CHECK - PUBLIC $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) - -set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17) - -target_include_directories(middle_layer_lib - PRIVATE ${UUID_DIR} - PUBLIC $ - PUBLIC $ - PRIVATE $ - PUBLIC $ - PUBLIC $ - PUBLIC $) - -target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB) - -# [SUBDIR]c_api -file(GLOB QPL_C_API_SRC - ${QPL_SRC_DIR}/c_api/compression_operations/*.c - ${QPL_SRC_DIR}/c_api/compression_operations/*.cpp - ${QPL_SRC_DIR}/c_api/filter_operations/*.cpp - ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.c - ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.cpp - ${QPL_SRC_DIR}/c_api/other_operations/*.cpp - ${QPL_SRC_DIR}/c_api/serialization/*.cpp - ${QPL_SRC_DIR}/c_api/*.cpp) - -add_library(qpl_c_api OBJECT ${QPL_C_API_SRC}) - -target_include_directories(qpl_c_api - PUBLIC $ - PUBLIC $ $ - PRIVATE $) - -set_target_properties(qpl_c_api PROPERTIES - $<$:C_STANDARD 17 - CXX_STANDARD 17) - -target_compile_options(qpl_c_api - PRIVATE $<$:$<$:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>> - PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) - -target_compile_definitions(qpl_c_api - PUBLIC -DQPL_BADARG_CHECK # own_checkers.h - PUBLIC -DQPL_LIB # needed for middle_layer_lib - PUBLIC $<$:LOG_HW_INIT>) # needed for middle_layer_lib - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -# Final _qpl target - -get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS) - -add_library(_qpl STATIC ${LIB_DEPS}) - -target_include_directories(_qpl - PUBLIC $ $) - -target_link_libraries(_qpl - PRIVATE ch_contrib::accel-config) - -target_include_directories(_qpl SYSTEM BEFORE - PUBLIC "${QPL_PROJECT_DIR}/include" - PUBLIC ${UUID_DIR}) - -add_library (ch_contrib::qpl ALIAS _qpl) diff --git a/contrib/qpl-cmake/uuid/uuid.h b/contrib/qpl-cmake/uuid/uuid.h deleted file mode 100644 index bf108ba0d29..00000000000 --- a/contrib/qpl-cmake/uuid/uuid.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _QPL_UUID_UUID_H -#define _QPL_UUID_UUID_H -typedef unsigned char uuid_t[16]; -#endif /* _QPL_UUID_UUID_H */ diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md deleted file mode 100644 index b9d39b8cc2d..00000000000 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ /dev/null @@ -1,327 +0,0 @@ ---- -slug: /en/development/building_and_benchmarking_deflate_qpl -sidebar_position: 73 -sidebar_label: Building and Benchmarking DEFLATE_QPL -description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec ---- - -# Build Clickhouse with DEFLATE_QPL - -- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) -- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1 - -- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) - -# Run Benchmark with DEFLATE_QPL - -## Files list - -The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts: - -`client_scripts` contains python scripts for running typical benchmark, for example: -- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances. -- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/) -- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically. - -`database_files` means it will store database files according to lz4/deflate/zstd codec. - -## Run benchmark automatically for Star Schema: - -``` bash -$ cd ./benchmark_sample/client_scripts -$ sh run_ssb.sh -``` - -After complete, please check all the results in this folder:`./output/` - -In case you run into failure, please manually run benchmark as below sections. - -## Definition - -[CLICKHOUSE_EXE] means the path of clickhouse executable program. - -## Environment - -- CPU: Sapphire Rapid -- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements) -- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) -- Install python modules: - -``` bash -pip3 install clickhouse_driver numpy -``` - -[Self-check for IAA] - -``` bash -$ accel-config list | grep -P 'iax|state' -``` - -Expected output like this: -``` bash - "dev":"iax1", - "state":"enabled", - "state":"enabled", -``` - -If you see nothing output, it means IAA is not ready to work. Please check IAA setup again. - -## Generate raw data - -``` bash -$ cd ./benchmark_sample -$ mkdir rawdata_dir && cd rawdata_dir -``` - -Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters: --s 20 - -The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`: - -## Database setup - -Set up database with LZ4 codec - -``` bash -$ cd ./database_dir/lz4 -$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` - -Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server. - -Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) -- Creating tables in ClickHouse -- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data. -- Converting “star schema” to de-normalized “flat schema” - -Set up database with IAA Deflate codec - -``` bash -$ cd ./database_dir/deflate -$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` -Complete three steps same as lz4 above - -Set up database with ZSTD codec - -``` bash -$ cd ./database_dir/zstd -$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` -Complete three steps same as lz4 above - -[self-check] -For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully: -```sql -select count() from lineorder_flat -``` -You are expected to see below output: -```sql -┌───count()─┐ -│ 119994608 │ -└───────────┘ -``` -[Self-check for IAA Deflate codec] - -At the first time you execute insertion or query from client, clickhouse server console is expected to print this log: -```text -Hardware-assisted DeflateQpl codec is ready! -``` -If you never find this, but see another log as below: -```text -Initialization of hardware-assisted DeflateQpl codec failed -``` -That means IAA devices is not ready, you need check IAA setup again. - -## Benchmark with single instance - -- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance` - -``` bash -$ cpupower idle-set -d 3 -$ cpupower frequency-set -g performance -``` - -- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket. -- Single instance means single server connected with single client - -Now run benchmark for LZ4/Deflate/ZSTD respectively: - -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log -``` - -IAA deflate: - -``` bash -$ cd ./database_dir/deflate -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log -``` - -Now three logs should be output as expected: -```text -lz4.log -deflate.log -zstd.log -``` - -How to check performance metrics: - -We focus on QPS, please search the keyword: `QPS_Final` and collect statistics - -## Benchmark with multi-instances - -- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances. -- Multi-instance means multiple(2 or 4)servers connected with respective client. -- The cores of one socket need to be divided equally and assigned to the servers respectively. -- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance. - -There are 2 differences: -- For client side, you need launch clickhouse with the assigned port during table creation and data insertion. -- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config. - -Here we assume there are 60 cores per socket and take 2 instances for example. -Launch server for first instance -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -``` - -IAA Deflate: - -``` bash -$ cd ./database_dir/deflate -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -``` - -[Launch server for second instance] - -LZ4: - -``` bash -$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2 -$ cp ../../server_config/config_lz4_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& -``` - -ZSTD: - -``` bash -$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2 -$ cp ../../server_config/config_zstd_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& -``` - -IAA Deflate: - -``` bash -$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2 -$ cp ../../server_config/config_deflate_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& -``` - -Creating tables && Inserting data for second instance - -Creating tables: - -``` bash -$ [CLICKHOUSE_EXE] client -m --port=9001 -``` - -Inserting data: - -``` bash -$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001 -``` - -- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`. -- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance. - -Benchmarking with 2 instances - -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ cd ./database_dir/lz4_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ cd ./database_dir/zstd_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log -``` - -IAA deflate - -``` bash -$ cd ./database_dir/deflate -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ cd ./database_dir/deflate_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log -``` - -Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/ - -Now three logs should be output as expected: - -``` text -lz4_2insts.log -deflate_2insts.log -zstd_2insts.log -``` -How to check performance metrics: - -We focus on QPS, please search the keyword: `QPS_Final` and collect statistics - -Benchmark setup for 4 instances is similar with 2 instances above. -We recommend use 2 instances benchmark data as final report for review. - -## Tips - -Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one: - -``` bash -$ ps -aux| grep clickhouse -$ kill -9 [PID] -``` -By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences. diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index 050bb495024..819f16cfd64 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -80,7 +80,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ("block-size,b", po::value()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") ("hc", "use LZ4HC instead of LZ4") ("zstd", "use ZSTD instead of LZ4") - ("deflate_qpl", "use deflate_qpl instead of LZ4") ("codec", po::value>()->multitoken(), "use codecs combination instead of LZ4") ("level", po::value(), "compression level for codecs specified via flags") ("none", "use no compression instead of LZ4") @@ -107,7 +106,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) bool decompress = options.count("decompress"); bool use_lz4hc = options.count("hc"); bool use_zstd = options.count("zstd"); - bool use_deflate_qpl = options.count("deflate_qpl"); bool stat_mode = options.count("stat"); bool use_none = options.count("none"); print_stacktrace = options.count("stacktrace"); @@ -116,7 +114,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) if (options.count("codec")) codecs = options["codec"].as>(); - if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty()) + if ((use_lz4hc || use_zstd || use_none) && !codecs.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, codec flags like --zstd and --codec options are mutually exclusive"); if (!codecs.empty() && options.count("level")) @@ -128,8 +126,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) method_family = "LZ4HC"; else if (use_zstd) method_family = "ZSTD"; - else if (use_deflate_qpl) - method_family = "DEFLATE_QPL"; else if (use_none) method_family = "NONE"; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 22d20fc82ce..39499cc577d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -359,12 +359,6 @@ set_source_files_properties( Columns/ColumnString.cpp PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") -if (ENABLE_QPL) - set_source_files_properties( - Compression/CompressionCodecDeflateQpl.cpp - PROPERTIES COMPILE_FLAGS "-mwaitpkg") -endif () - target_link_libraries(clickhouse_common_io PUBLIC boost::program_options @@ -591,15 +585,8 @@ endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) -if (TARGET ch_contrib::qpl) - dbms_target_link_libraries(PUBLIC ch_contrib::qpl) - target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl) - target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) -endif () - -if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin) +if (TARGET ch_contrib::qatzstd_plugin) dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) - dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) endif () diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 416bb2f0b15..667db913630 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -55,7 +55,6 @@ namespace Setting { extern const SettingsBool allow_experimental_codecs; extern const SettingsBool allow_suspicious_codecs; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsString network_compression_method; extern const SettingsInt64 network_zstd_compression_level; @@ -811,7 +810,6 @@ void Connection::sendQuery( level, !(*settings)[Setting::allow_suspicious_codecs], (*settings)[Setting::allow_experimental_codecs], - (*settings)[Setting::enable_deflate_qpl_codec], (*settings)[Setting::enable_zstd_qat_codec]); compression_codec = CompressionCodecFactory::instance().get(method, level); } diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 86ac054a62c..9d80e9845f4 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -32,7 +32,6 @@ #cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN -#cmakedefine01 USE_QPL #cmakedefine01 USE_QATLIB #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 2b65f2d690c..22f19139a5f 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -317,18 +317,6 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d codec->decompress(compressed_buffer, static_cast(size_compressed_without_checksum), to.begin()); } -void CompressedReadBufferBase::flushAsynchronousDecompressRequests() const -{ - if (codec) - codec->flushAsynchronousDecompressRequests(); -} - -void CompressedReadBufferBase::setDecompressMode(ICompressionCodec::CodecMode mode) const -{ - if (codec) - codec->setDecompressMode(mode); -} - /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_, bool external_data_) : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_), external_data(external_data_) diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h index 4a164a6ce68..b15d05f7e80 100644 --- a/src/Compression/CompressedReadBufferBase.h +++ b/src/Compression/CompressedReadBufferBase.h @@ -64,14 +64,6 @@ protected: /// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location. void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum); - /// Flush all asynchronous decompress request. - void flushAsynchronousDecompressRequests() const; - - /// Set decompression mode: Synchronous/Asynchronous/SoftwareFallback. - /// The mode is "Synchronous" by default. - /// flushAsynchronousDecompressRequests must be called subsequently once set "Asynchronous" mode. - void setDecompressMode(ICompressionCodec::CodecMode mode) const; - public: /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false, bool external_data_ = false); diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 9dc40b8217c..0589f47cf86 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -90,8 +90,6 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_t bytes_read = 0; - /// The codec mode is only relevant for codecs which support hardware offloading. - ICompressionCodec::CodecMode decompress_mode = ICompressionCodec::CodecMode::Synchronous; bool read_tail = false; /// If there are unread bytes in the buffer, then we copy needed to `to`. @@ -104,28 +102,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) size_t size_decompressed = 0; size_t size_compressed_without_checksum = 0; - ///Try to read block which is entirely located in a single 'compressed_in->' buffer. - size_t new_size_compressed = readCompressedDataBlockForAsynchronous(size_decompressed, size_compressed_without_checksum); - - if (new_size_compressed) - { - /// Current block is entirely located in a single 'compressed_in->' buffer. - /// We can set asynchronous decompression mode if supported to boost performance. - decompress_mode = ICompressionCodec::CodecMode::Asynchronous; - } - else - { - /// Current block cannot be decompressed asynchronously, means it probably span across two compressed_in buffers. - /// Meanwhile, asynchronous requests for previous blocks should be flushed if any. - flushAsynchronousDecompressRequests(); - /// Fallback to generic API - new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); - decompress_mode = ICompressionCodec::CodecMode::Synchronous; - } - size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer. - - if (!new_size_compressed) - break; + size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); @@ -133,7 +110,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { - setDecompressMode(decompress_mode); decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; @@ -148,8 +124,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - /// Synchronous mode must be set since we need read partial data immediately from working buffer to target buffer. - setDecompressMode(ICompressionCodec::CodecMode::Synchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); /// Read partial data from first block. Won't run here at second block. @@ -168,17 +142,12 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - // Asynchronous mode can be set here because working_buffer wouldn't be overwritten any more since this is the last block. - setDecompressMode(ICompressionCodec::CodecMode::Asynchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); read_tail = true; break; } } - /// Here we must make sure all asynchronous requests above are completely done. - flushAsynchronousDecompressRequests(); - if (read_tail) { /// Manually take nextimpl_working_buffer_offset into account, because we don't use diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp deleted file mode 100644 index 30085762c00..00000000000 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ /dev/null @@ -1,490 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if USE_QPL - -#include "libaccel_config.h" - -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int CANNOT_COMPRESS; - extern const int CANNOT_DECOMPRESS; -} - -DeflateQplJobHWPool & DeflateQplJobHWPool::instance() -{ - static DeflateQplJobHWPool pool; - return pool; -} - -DeflateQplJobHWPool::DeflateQplJobHWPool() - : max_hw_jobs(0) - , random_engine(randomSeed()) -{ - LoggerPtr log = getLogger("DeflateQplJobHWPool"); - const char * qpl_version = qpl_get_library_version(); - - // loop all configured workqueue size to get maximum job number. - accfg_ctx * ctx_ptr = nullptr; - auto ctx_status = accfg_new(&ctx_ptr); - SCOPE_EXIT({ accfg_unref(ctx_ptr); }); - if (ctx_status == 0) - { - auto * dev_ptr = accfg_device_get_first(ctx_ptr); - while (dev_ptr != nullptr) - { - for (auto * wq_ptr = accfg_wq_get_first(dev_ptr); wq_ptr != nullptr; wq_ptr = accfg_wq_get_next(wq_ptr)) - max_hw_jobs += accfg_wq_get_size(wq_ptr); - dev_ptr = accfg_device_get_next(dev_ptr); - } - } - else - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to create new libaccel_config context -> status: {}, QPL Version: {}.", ctx_status, qpl_version); - return; - } - - if (max_hw_jobs == 0) - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to get available workqueue size -> total_wq_size: {}, QPL Version: {}.", max_hw_jobs, qpl_version); - return; - } - distribution = std::uniform_int_distribution(0, max_hw_jobs - 1); - /// Get size required for saving a single qpl job object - qpl_get_job_size(qpl_path_hardware, &per_job_size); - /// Allocate job buffer pool for storing all job objects - hw_jobs_buffer = std::make_unique(per_job_size * max_hw_jobs); - hw_job_ptr_locks = std::make_unique(max_hw_jobs); - /// Initialize all job objects in job buffer pool - for (UInt32 index = 0; index < max_hw_jobs; ++index) - { - qpl_job * job_ptr = reinterpret_cast(hw_jobs_buffer.get() + index * per_job_size); - if (auto status = qpl_init_job(qpl_path_hardware, job_ptr); status != QPL_STS_OK) - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to Initialize qpl job -> status: {}, QPL Version: {}.", static_cast(status), qpl_version); - return; - } - unLockJob(index); - } - - job_pool_ready = true; - LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}, max_hw_jobs: {}",qpl_version, max_hw_jobs); -} - -DeflateQplJobHWPool::~DeflateQplJobHWPool() -{ - for (UInt32 i = 0; i < max_hw_jobs; ++i) - { - qpl_job * job_ptr = reinterpret_cast(hw_jobs_buffer.get() + i * per_job_size); - while (!tryLockJob(i)); - qpl_fini_job(job_ptr); - unLockJob(i); - } - job_pool_ready = false; -} - -qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 & job_id) -{ - if (isJobPoolReady()) - { - UInt32 retry = 0; - UInt32 index = distribution(random_engine); - while (!tryLockJob(index)) - { - index = distribution(random_engine); - retry++; - if (retry > max_hw_jobs) - { - return nullptr; - } - } - job_id = max_hw_jobs - index; - assert(index < max_hw_jobs); - return reinterpret_cast(hw_jobs_buffer.get() + index * per_job_size); - } - return nullptr; -} - -void DeflateQplJobHWPool::releaseJob(UInt32 job_id) -{ - if (isJobPoolReady()) - unLockJob(max_hw_jobs - job_id); -} - -bool DeflateQplJobHWPool::tryLockJob(UInt32 index) -{ - bool expected = false; - assert(index < max_hw_jobs); - return hw_job_ptr_locks[index].compare_exchange_strong(expected, true); -} - -void DeflateQplJobHWPool::unLockJob(UInt32 index) -{ - assert(index < max_hw_jobs); - hw_job_ptr_locks[index].store(false); -} - -HardwareCodecDeflateQpl::HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_) - : log(getLogger("HardwareCodecDeflateQpl")) - , sw_codec(sw_codec_) -{ -} - -HardwareCodecDeflateQpl::~HardwareCodecDeflateQpl() -{ -#ifndef NDEBUG - assert(decomp_async_job_map.empty()); -#else - if (!decomp_async_job_map.empty()) - { - LOG_WARNING(log, "Find un-released job when HardwareCodecDeflateQpl destroy"); - for (auto it : decomp_async_job_map) - { - DeflateQplJobHWPool::instance().releaseJob(it.first); - } - decomp_async_job_map.clear(); - } -#endif -} - -Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - UInt32 compressed_size = 0; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - job_ptr->op = qpl_op_compress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->level = qpl_default_level; - job_ptr->available_out = dest_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; - - auto status = qpl_execute_job(job_ptr); - if (status == QPL_STS_OK) - { - compressed_size = job_ptr->total_out; - DeflateQplJobHWPool::instance().releaseJob(job_id); - return compressed_size; - } - - LOG_WARNING( - log, - "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->qpl_execute_job with error code: {} - please " - "refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - DeflateQplJobHWPool::instance().releaseJob(job_id); - return RET_ERROR; -} - -Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - UInt32 decompressed_size = 0; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - auto status = qpl_submit_job(job_ptr); - if (status != QPL_STS_OK) - { - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - return RET_ERROR; - } - /// Busy waiting till job complete. - do - { - _tpause(1, __rdtsc() + 1000); - status = qpl_check_job(job_ptr); - } while (status == QPL_STS_BEING_PROCESSED); - - if (status != QPL_STS_OK) - { - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - return RET_ERROR; - } - - decompressed_size = job_ptr->total_out; - DeflateQplJobHWPool::instance().releaseJob(job_id); - return decompressed_size; -} - -Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - auto status = qpl_submit_job(job_ptr); - if (status == QPL_STS_OK) - { - decomp_async_job_map.insert({job_id, job_ptr}); - return job_id; - } - - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING( - log, - "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->qpl_submit_job with error code: {} " - "- please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - return RET_ERROR; -} - -void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests() -{ - auto n_jobs_processing = decomp_async_job_map.size(); - std::map::iterator it = decomp_async_job_map.begin(); - - while (n_jobs_processing) - { - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - job_id = it->first; - job_ptr = it->second; - - auto status = qpl_check_job(job_ptr); - if (status == QPL_STS_BEING_PROCESSED) - { - it++; - } - else - { - if (status != QPL_STS_OK) - { - sw_codec.doDecompressData( - reinterpret_cast(job_ptr->next_in_ptr), - job_ptr->available_in, - reinterpret_cast(job_ptr->next_out_ptr), - job_ptr->available_out); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: flushAsynchronousDecompressRequests with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - } - it = decomp_async_job_map.erase(it); - DeflateQplJobHWPool::instance().releaseJob(job_id); - n_jobs_processing--; - if (n_jobs_processing <= 0) - break; - } - - if (it == decomp_async_job_map.end()) - { - it = decomp_async_job_map.begin(); - _tpause(1, __rdtsc() + 1000); - } - } -} - -SoftwareCodecDeflateQpl::~SoftwareCodecDeflateQpl() -{ - if (!sw_job) - qpl_fini_job(sw_job); -} - -qpl_job * SoftwareCodecDeflateQpl::getJobCodecPtr() -{ - if (!sw_job) - { - UInt32 size = 0; - qpl_get_job_size(qpl_path_software, &size); - - sw_buffer = std::make_unique(size); - sw_job = reinterpret_cast(sw_buffer.get()); - - // Job initialization - if (auto status = qpl_init_job(qpl_path_software, sw_job); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_COMPRESS, - "Initialization of DeflateQpl software fallback codec failed. " - "(Details: qpl_init_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - } - return sw_job; -} - -UInt32 SoftwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) -{ - qpl_job * job_ptr = getJobCodecPtr(); - // Performing a compression operation - job_ptr->op = qpl_op_compress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = dest_size; - job_ptr->level = qpl_default_level; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; - - if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_COMPRESS, - "Execution of DeflateQpl software fallback codec failed. " - "(Details: qpl_execute_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - - return job_ptr->total_out; -} - -void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - qpl_job * job_ptr = getJobCodecPtr(); - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_DECOMPRESS, - "Execution of DeflateQpl software fallback codec failed. " - "(Details: qpl_execute_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); -} - -CompressionCodecDeflateQpl::CompressionCodecDeflateQpl() - : sw_codec(std::make_unique()) - , hw_codec(std::make_unique(*sw_codec)) -{ - setCodecDescription("DEFLATE_QPL"); -} - -uint8_t CompressionCodecDeflateQpl::getMethodByte() const -{ - return static_cast(CompressionMethodByte::DeflateQpl); -} - -void CompressionCodecDeflateQpl::updateHash(SipHash & hash) const -{ - getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true); -} - -UInt32 CompressionCodecDeflateQpl::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - /// Aligned with ZLIB - return ((uncompressed_size) + ((uncompressed_size) >> 12) + ((uncompressed_size) >> 14) + ((uncompressed_size) >> 25) + 13); -} - -UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ -/// QPL library is using AVX-512 with some shuffle operations. -/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dest, getMaxCompressedDataSize(source_size)); - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - res = sw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); - return res; -} - -inline void touchBufferWithZeroFilling(char * buffer, UInt32 buffer_size) -{ - for (char * p = buffer; p < buffer + buffer_size; p += ::getPageSize()/(sizeof(*p))) - { - *p = 0; - } -} - -void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ -/// QPL library is using AVX-512 with some shuffle operations. -/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dest, uncompressed_size); -/// Device IOTLB miss has big perf. impact for IAA accelerators. -/// To avoid page fault, we need touch buffers related to accelerator in advance. - touchBufferWithZeroFilling(dest, uncompressed_size); - - switch (getDecompressMode()) - { - case CodecMode::Synchronous: - { - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - { - res = hw_codec->doDecompressDataSynchronous(source, source_size, dest, uncompressed_size); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - } - else - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } - case CodecMode::Asynchronous: - { - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - res = hw_codec->doDecompressDataAsynchronous(source, source_size, dest, uncompressed_size); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } - case CodecMode::SoftwareFallback: - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } -} - -void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() -{ - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - hw_codec->flushAsynchronousDecompressRequests(); - /// After flush previous all async requests, we must restore mode to be synchronous by default. - setDecompressMode(CodecMode::Synchronous); -} -void registerCodecDeflateQpl(CompressionCodecFactory & factory) -{ - factory.registerSimpleCompressionCodec( - "DEFLATE_QPL", static_cast(CompressionMethodByte::DeflateQpl), [&]() { return std::make_shared(); }); -} -} -#endif diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h deleted file mode 100644 index d9abc0fb7e0..00000000000 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ /dev/null @@ -1,125 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "config.h" - -#if USE_QPL - -#include - -namespace Poco -{ -class Logger; -} - -namespace DB -{ - -/// DeflateQplJobHWPool is resource pool to provide the job objects. -/// Job object is used for storing context information during offloading compression job to HW Accelerator. -class DeflateQplJobHWPool -{ -public: - DeflateQplJobHWPool(); - ~DeflateQplJobHWPool(); - - static DeflateQplJobHWPool & instance(); - - qpl_job * acquireJob(UInt32 & job_id); - void releaseJob(UInt32 job_id); - const bool & isJobPoolReady() const { return job_pool_ready; } - -private: - bool tryLockJob(UInt32 index); - void unLockJob(UInt32 index); - - /// size of each job objects - UInt32 per_job_size; - /// Maximum jobs running in parallel supported by IAA hardware - UInt32 max_hw_jobs; - /// Entire buffer for storing all job objects - std::unique_ptr hw_jobs_buffer; - /// Locks for accessing each job object pointers - std::unique_ptr hw_job_ptr_locks; - - bool job_pool_ready; - pcg64_fast random_engine; - std::uniform_int_distribution distribution; -}; - -class SoftwareCodecDeflateQpl -{ -public: - ~SoftwareCodecDeflateQpl(); - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size); - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - -private: - qpl_job * sw_job = nullptr; - std::unique_ptr sw_buffer; - - qpl_job * getJobCodecPtr(); -}; - -class HardwareCodecDeflateQpl -{ -public: - /// RET_ERROR stands for hardware codec fail, needs fallback to software codec. - static constexpr Int32 RET_ERROR = -1; - - explicit HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_); - ~HardwareCodecDeflateQpl(); - - Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const; - - /// Submit job request to the IAA hardware and then busy waiting till it complete. - Int32 doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - - /// Submit job request to the IAA hardware and return immediately. IAA hardware will process decompression jobs automatically. - Int32 doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - - /// Flush result for all previous requests which means busy waiting till all the jobs in "decomp_async_job_map" are finished. - /// Must be called subsequently after several calls of doDecompressDataReq. - void flushAsynchronousDecompressRequests(); - -private: - /// Asynchronous job map for decompression: job ID - job object. - /// For each submission, push job ID && job object into this map; - /// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete. - std::map decomp_async_job_map; - LoggerPtr log; - /// Provides a fallback in case of errors. - SoftwareCodecDeflateQpl & sw_codec; -}; - -class CompressionCodecDeflateQpl final : public ICompressionCodec -{ -public: - CompressionCodecDeflateQpl(); - uint8_t getMethodByte() const override; - void updateHash(SipHash & hash) const override; - -protected: - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - bool isDeflateQpl() const override { return true; } - - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - /// Flush result for previous asynchronous decompression requests on asynchronous mode. - void flushAsynchronousDecompressRequests() override; - -private: - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - - std::unique_ptr sw_codec; - std::unique_ptr hw_codec; -}; - -} -#endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index fb4581f22b4..c8ad3d71376 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -176,9 +176,6 @@ void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); -#if USE_QPL -void registerCodecDeflateQpl(CompressionCodecFactory & factory); -#endif /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build @@ -206,9 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); -#if USE_QPL - registerCodecDeflateQpl(*this); -#endif registerCodecGCD(*this); default_codec = get("LZ4", {}); diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index 2885f35d7bd..64d454d3e86 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index a54169d4524..09eb2cf3844 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + {}, sanity_check, allow_experimental_codecs, enable_zstd_qat_codec); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + {}, sanity_check, allow_experimental_codecs, enable_zstd_qat_codec); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const { if (const auto * func = ast->as()) { @@ -159,12 +159,6 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); - if (!enable_deflate_qpl_codec && result_codec->isDeflateQpl()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Codec {} is disabled by default." - " You can enable it with the 'enable_deflate_qpl_codec' setting.", - codec_family_name); - if (!enable_zstd_qat_codec && result_codec->isZstdQat()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec {} is disabled by default." diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index ee4b3e38653..f01661cbe1d 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -46,7 +46,6 @@ enum class CompressionMethodByte : uint8_t AES_128_GCM_SIV = 0x96, AES_256_GCM_SIV = 0x97, FPC = 0x98, - DeflateQpl = 0x99, GCD = 0x9a, ZSTD_QPL = 0x9b, }; diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index f77b1323d2e..549817cb0b9 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -47,37 +47,9 @@ public: /// Decompress bytes from compressed source to dest. Dest should preallocate memory; UInt32 decompress(const char * source, UInt32 source_size, char * dest) const; - /// Three kinds of codec mode: - /// Synchronous mode which is commonly used by default; - /// --- For the codec with HW decompressor, it means submit request to HW and busy wait till complete. - /// Asynchronous mode which required HW decompressor support; - /// --- For the codec with HW decompressor, it means submit request to HW and return immediately. - /// --- Must be used in pair with flushAsynchronousDecompressRequests. - /// SoftwareFallback mode is exclusively defined for the codec with HW decompressor, enable its capability of "fallback to SW codec". - enum class CodecMode : uint8_t - { - Synchronous, - Asynchronous, - SoftwareFallback - }; - - /// Get current decompression mode - CodecMode getDecompressMode() const{ return decompressMode; } - - /// if set mode to CodecMode::Asynchronous, must be followed with flushAsynchronousDecompressRequests - void setDecompressMode(CodecMode mode) { decompressMode = mode; } - /// Report decompression errors as CANNOT_DECOMPRESS, not CORRUPTED_DATA void setExternalDataFlag() { decompression_error_code = ErrorCodes::CANNOT_DECOMPRESS; } - /// Flush result for previous asynchronous decompression requests. - /// This function must be called following several requests offload to HW. - /// To make sure asynchronous results have been flushed into target buffer completely. - /// Meanwhile, source and target buffer for decompression can not be overwritten until this function execute completely. - /// Otherwise it would conflict with HW offloading and cause exception. - /// For QPL deflate, it support the maximum number of requests equal to DeflateQplJobHWPool::jobPoolSize - virtual void flushAsynchronousDecompressRequests(){} - /// Number of bytes, that will be used to compress uncompressed_size bytes with current codec virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { @@ -118,9 +90,6 @@ public: /// It will not be allowed to use unless the user will turn off the safety switch. virtual bool isExperimental() const { return false; } - /// Is this the DEFLATE_QPL codec? - virtual bool isDeflateQpl() const { return false; } - /// Is this the ZSTD_QAT codec? virtual bool isZstdQat() const { return false; } @@ -147,7 +116,6 @@ protected: private: ASTPtr full_codec_desc; - CodecMode decompressMode{CodecMode::Synchronous}; }; using CompressionCodecPtr = std::shared_ptr; diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 307cc5b9182..97e71b143e7 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -2131,12 +2131,7 @@ If it is set to true, then a user is allowed to executed distributed DDL queries If it is set to true, allow to specify meaningless compression codecs. )", 0) \ M(Bool, enable_deflate_qpl_codec, false, R"( -If turned on, the DEFLATE_QPL codec may be used to compress columns. - -Possible values: - -- 0 - Disabled -- 1 - Enabled +Obsolete setting, does nothing. )", 0) \ M(Bool, enable_zstd_qat_codec, false, R"( If turned on, the ZSTD_QAT codec may be used to compress columns. diff --git a/src/Databases/enableAllExperimentalSettings.cpp b/src/Databases/enableAllExperimentalSettings.cpp index d2a3ecfe05f..d1b3b776370 100644 --- a/src/Databases/enableAllExperimentalSettings.cpp +++ b/src/Databases/enableAllExperimentalSettings.cpp @@ -40,7 +40,6 @@ void enableAllExperimentalSettings(ContextMutablePtr context) context->setSetting("allow_suspicious_primary_key", 1); context->setSetting("allow_suspicious_ttl_expressions", 1); context->setSetting("allow_suspicious_variant_types", 1); - context->setSetting("enable_deflate_qpl_codec", 1); context->setSetting("enable_zstd_qat_codec", 1); context->setSetting("allow_create_index_without_type", 1); context->setSetting("allow_experimental_s3queue", 1); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6057afefd02..22bba01a60f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -129,7 +129,6 @@ namespace Setting extern const SettingsDefaultTableEngine default_temporary_table_engine; extern const SettingsString default_view_definer; extern const SettingsUInt64 distributed_ddl_entry_format_version; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool flatten_nested; extern const SettingsBool fsync_metadata; @@ -667,7 +666,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool skip_checks = LoadingStrictnessLevel::SECONDARY_CREATE <= mode; bool sanity_check_compression_codecs = !skip_checks && !context_->getSettingsRef()[Setting::allow_suspicious_codecs]; bool allow_experimental_codecs = skip_checks || context_->getSettingsRef()[Setting::allow_experimental_codecs]; - bool enable_deflate_qpl_codec = skip_checks || context_->getSettingsRef()[Setting::enable_deflate_qpl_codec]; bool enable_zstd_qat_codec = skip_checks || context_->getSettingsRef()[Setting::enable_zstd_qat_codec]; ColumnsDescription res; @@ -729,7 +727,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_zstd_qat_codec); } if (col_decl.statistics_desc) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f18c9f1cb95..921c53b6bcb 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -78,7 +78,6 @@ namespace Setting extern const SettingsUInt64 async_insert_max_data_size; extern const SettingsBool calculate_text_stack_trace; extern const SettingsBool deduplicate_blocks_in_dependent_materialized_views; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsUInt64 idle_connection_timeout; extern const SettingsBool input_format_defaults_for_omitted_fields; @@ -2238,7 +2237,6 @@ void TCPHandler::initBlockOutput(const Block & block) level, !query_settings[Setting::allow_suspicious_codecs], query_settings[Setting::allow_experimental_codecs], - query_settings[Setting::enable_deflate_qpl_codec], query_settings[Setting::enable_zstd_qat_codec]); state.maybe_compressed_out = std::make_shared( diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7c328526ab7..ab4403b3a94 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -43,6 +41,7 @@ #include + namespace DB { namespace Setting @@ -51,7 +50,6 @@ namespace Setting extern const SettingsBool allow_experimental_codecs; extern const SettingsBool allow_suspicious_codecs; extern const SettingsBool allow_suspicious_ttl_expressions; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool flatten_nested; } @@ -497,7 +495,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) column.comment = *comment; if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true); column.ttl = ttl; @@ -566,7 +564,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) else { if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true); if (comment) column.comment = *comment; @@ -1381,7 +1379,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const command.data_type, !settings[Setting::allow_suspicious_codecs], settings[Setting::allow_experimental_codecs], - settings[Setting::enable_deflate_qpl_codec], settings[Setting::enable_zstd_qat_codec]); } @@ -1412,7 +1409,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const command.data_type, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], - context->getSettingsRef()[Setting::enable_deflate_qpl_codec], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); } auto column_default = all_columns.getDefault(column_name); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3922f1cfcfb..b96c620592d 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -215,7 +215,7 @@ void ColumnDescription::readText(ReadBuffer & buf) comment = col_ast->comment->as().value.safeGet(); if (col_ast->codec) - codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); + codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true); if (col_ast->ttl) ttl = col_ast->ttl; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 5bc3fcc5be3..4eec704fdd5 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -60,7 +60,6 @@ namespace Setting extern const SettingsBool allow_suspicious_codecs; extern const SettingsMilliseconds distributed_background_insert_sleep_time_ms; extern const SettingsBool distributed_insert_skip_read_only_replicas; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool insert_allow_materialized_columns; extern const SettingsBool insert_distributed_one_random_shard; @@ -799,7 +798,6 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const compression_level, !settings[Setting::allow_suspicious_codecs], settings[Setting::allow_experimental_codecs], - settings[Setting::enable_deflate_qpl_codec], settings[Setting::enable_zstd_qat_codec]); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 6daad8488ff..4845984cc88 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -30,7 +30,6 @@ namespace Setting extern const SettingsBool allow_suspicious_codecs; extern const SettingsBool allow_suspicious_ttl_expressions; extern const SettingsBool enable_zstd_qat_codec; - extern const SettingsBool enable_deflate_qpl_codec; } namespace ErrorCodes @@ -349,7 +348,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], context->getSettingsRef()[Setting::enable_deflate_qpl_codec], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); + ttl_element->recompression_codec, {}, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); } } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c67f8d290b3..94a013d21dd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -135,9 +135,6 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() -if (TARGET ch_contrib::qpl) - set(USE_QPL 1) -endif() if (TARGET ch_contrib::qatlib) set(USE_QATLIB 1) endif() diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 3b3a6bcadb5..6b8a1d86e05 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -19,7 +19,6 @@ def get_options(i: int, upgrade_check: bool) -> str: if i % 3 == 2 and not upgrade_check: options.append(f'''--db-engine="Replicated('/test/db/test_{i}', 's1', 'r1')"''') - client_options.append("enable_deflate_qpl_codec=1") client_options.append("enable_zstd_qat_codec=1") # If database name is not specified, new database is created for each functional test. diff --git a/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml b/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml deleted file mode 100644 index 2ad6a0f1eff..00000000000 --- a/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - 0 - 0 - - deflate_qpl - - - diff --git a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml deleted file mode 100644 index 24e101e0e3f..00000000000 --- a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index 29776eba176..bdff1d4fb20 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -37,19 +37,6 @@ node5 = cluster.add_instance( "configs/allow_suspicious_codecs.xml", ], ) -node6 = cluster.add_instance( - "node6", - main_configs=["configs/deflateqpl_compression_by_default.xml"], - user_configs=[ - "configs/allow_suspicious_codecs.xml", - "configs/enable_deflateqpl_codec.xml", - ], -) -node7 = cluster.add_instance( - "node7", - main_configs=["configs/allow_experimental_codecs.xml"], - user_configs=["configs/allow_suspicious_codecs.xml"], -) @pytest.fixture(scope="module") @@ -253,63 +240,3 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): ) == "10000\n" ) - - -def test_preconfigured_deflateqpl_codec(start_cluster): - if is_arm(): - pytest.skip( - "Skipping test because it's special test for Intel code (doesn't work on ARM)" - ) - - node6.query( - """ - CREATE TABLE compression_codec_multiple_with_key ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, DEFLATE_QPL), - data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL), - somecolumn Float64 - ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; - """ - ) - node6.query( - "INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, 'hello', 88.88), (toDate('2018-10-12'), 100002, 'world', 99.99), (toDate('2018-10-12'), 1111, '!', 777.777)" - ) - assert ( - node6.query( - "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" - ) - == "2\n" - ) - assert ( - node6.query( - "SELECT DISTINCT somecolumn FROM compression_codec_multiple_with_key ORDER BY id" - ) - == "777.777\n88.88\n99.99\n" - ) - assert ( - node6.query( - "SELECT data FROM compression_codec_multiple_with_key WHERE id >= 1112 AND somedate = toDate('2018-10-12') AND somecolumn <= 100" - ) - == "hello\nworld\n" - ) - - node6.query( - "INSERT INTO compression_codec_multiple_with_key SELECT toDate('2018-10-12'), number, toString(number), 1.0 FROM system.numbers LIMIT 10000" - ) - - assert ( - node6.query( - "SELECT COUNT(id) FROM compression_codec_multiple_with_key WHERE id % 10 == 0" - ) - == "1001\n" - ) - assert ( - node6.query("SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key") - == str(777.777 + 88.88 + 99.99 + 1.0 * 10000) + "\n" - ) - assert ( - node6.query( - "SELECT count(*) FROM compression_codec_multiple_with_key GROUP BY somedate" - ) - == "10003\n" - ) diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference deleted file mode 100644 index a6e03404f2b..00000000000 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) -2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) -3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) -2 -10001 diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql deleted file mode 100644 index d8c28a7d9d7..00000000000 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ /dev/null @@ -1,49 +0,0 @@ ---Tags: no-fasttest, no-cpu-aarch64, no-cpu-s390x --- no-fasttest because DEFLATE_QPL isn't available in fasttest --- no-cpu-aarch64 and no-cpu-s390x because DEFLATE_QPL is x86-only - --- A bunch of random DDLs to test the DEFLATE_QPL codec. - -SET enable_deflate_qpl_codec = 1; - --- Suppress test failures because stderr contains warning "Initialization of hardware-assisted DeflateQpl failed, falling --- back to software DeflateQpl coded." -SET send_logs_level = 'fatal'; - -DROP TABLE IF EXISTS compression_codec; - -CREATE TABLE compression_codec( - id UInt64 CODEC(DEFLATE_QPL), - data String CODEC(DEFLATE_QPL), - ddd Date CODEC(DEFLATE_QPL), - ddd32 Date32 CODEC(DEFLATE_QPL), - somenum Float64 CODEC(DEFLATE_QPL), - somestr FixedString(3) CODEC(DEFLATE_QPL), - othernum Int64 CODEC(DEFLATE_QPL), - somearray Array(UInt8) CODEC(DEFLATE_QPL), - somemap Map(String, UInt32) CODEC(DEFLATE_QPL), - sometuple Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL), -) ENGINE = MergeTree() ORDER BY tuple(); - -SHOW CREATE TABLE compression_codec; - -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); - -SELECT * FROM compression_codec ORDER BY id; - -OPTIMIZE TABLE compression_codec FINAL; - -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8, [10,11,12], map('k7',7,'k8',8), tuple(7,8)); - -DETACH TABLE compression_codec; -ATTACH TABLE compression_codec; - -SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; - -INSERT INTO compression_codec SELECT 3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6) FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM compression_codec WHERE id = 3 GROUP BY id; - -DROP TABLE IF EXISTS compression_codec;