From 8d851457254cf7fe90edde613cffeeeb011495eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Jun 2021 02:26:52 +0300 Subject: [PATCH] Remove experimental compression codecs --- .gitmodules | 9 - CMakeLists.txt | 3 - cmake/find/density.cmake | 9 - cmake/find/lizard.cmake | 9 - cmake/find/lzsse.cmake | 14 -- contrib/CMakeLists.txt | 12 -- contrib/density | 1 - contrib/density-cmake/CMakeLists.txt | 47 ----- contrib/lizard | 1 - contrib/lizard-cmake/CMakeLists.txt | 19 -- contrib/lzsse | 1 - contrib/lzsse-cmake/CMakeLists.txt | 21 -- src/CMakeLists.txt | 12 -- src/Common/config.h.in | 3 - src/Compression/CompressionCodecDensity.cpp | 122 ------------ src/Compression/CompressionCodecLZSSE.cpp | 180 ------------------ src/Compression/CompressionCodecLizard.cpp | 121 ------------ src/Compression/CompressionFactory.cpp | 21 -- src/Compression/CompressionInfo.h | 6 - src/Core/Settings.h | 2 +- src/Core/config_core.h.in | 3 - .../test_non_default_compression/test.py | 16 -- .../00096_experimental_codecs.reference | 8 - .../1_stateful/00096_experimental_codecs.sql | 61 ------ 24 files changed, 1 insertion(+), 700 deletions(-) delete mode 100644 cmake/find/density.cmake delete mode 100644 cmake/find/lizard.cmake delete mode 100644 cmake/find/lzsse.cmake delete mode 160000 contrib/density delete mode 100644 contrib/density-cmake/CMakeLists.txt delete mode 160000 contrib/lizard delete mode 100644 contrib/lizard-cmake/CMakeLists.txt delete mode 160000 contrib/lzsse delete mode 100644 contrib/lzsse-cmake/CMakeLists.txt delete mode 100644 src/Compression/CompressionCodecDensity.cpp delete mode 100644 src/Compression/CompressionCodecLZSSE.cpp delete mode 100644 src/Compression/CompressionCodecLizard.cpp delete mode 100644 tests/queries/1_stateful/00096_experimental_codecs.reference delete mode 100644 tests/queries/1_stateful/00096_experimental_codecs.sql diff --git a/.gitmodules b/.gitmodules index e59fbd2ca7e..ab7c8a7c94d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -228,15 +228,6 @@ [submodule "contrib/datasketches-cpp"] path = contrib/datasketches-cpp url = https://github.com/ClickHouse-Extras/datasketches-cpp.git -[submodule "contrib/lizard"] - path = contrib/lizard - url = https://github.com/inikep/lizard -[submodule "contrib/density"] - path = contrib/density - url = https://github.com/centaurean/density.git -[submodule "contrib/lzsse"] - path = contrib/lzsse - url = https://github.com/ConorStokes/LZSSE.git [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp url = https://github.com/ClickHouse-Extras/yaml-cpp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index fd98ef95b67..ce0f58e2521 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -527,9 +527,6 @@ include (cmake/find/rocksdb.cmake) include (cmake/find/libpqxx.cmake) include (cmake/find/nuraft.cmake) include (cmake/find/yaml-cpp.cmake) -include (cmake/find/lizard.cmake) -include (cmake/find/lzsse.cmake) -include (cmake/find/density.cmake) if(NOT USE_INTERNAL_PARQUET_LIBRARY) set (ENABLE_ORC OFF CACHE INTERNAL "") diff --git a/cmake/find/density.cmake b/cmake/find/density.cmake deleted file mode 100644 index f4963d67709..00000000000 --- a/cmake/find/density.cmake +++ /dev/null @@ -1,9 +0,0 @@ -option(USE_DENSITY "Enable DENSITY experimental compression library" ${ENABLE_LIBRARIES}) - -if (NOT USE_DENSITY) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/density/README.md") - message (ERROR "submodule contrib/density is missing. to fix try run: \n git submodule update --init --recursive") -endif() diff --git a/cmake/find/lizard.cmake b/cmake/find/lizard.cmake deleted file mode 100644 index 27c8945c45a..00000000000 --- a/cmake/find/lizard.cmake +++ /dev/null @@ -1,9 +0,0 @@ -option(USE_LIZARD "Enable Lizard experimental compression library" ${ENABLE_LIBRARIES}) - -if (NOT USE_LIZARD) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lizard/README.md") - message (ERROR "submodule contrib/lizard is missing. to fix try run: \n git submodule update --init --recursive") -endif() diff --git a/cmake/find/lzsse.cmake b/cmake/find/lzsse.cmake deleted file mode 100644 index 763483d8ea9..00000000000 --- a/cmake/find/lzsse.cmake +++ /dev/null @@ -1,14 +0,0 @@ -set (DEFAULT_USE_LZSSE 0) -if (ENABLE_LIBRARIES AND ARCH_AMD64) - set (DEFAULT_USE_LZSSE 1) -endif() - -option(USE_LZSSE "Enable LZSSE experimental compression library" ${DEFAULT_USE_LZSSE}) - -if (NOT USE_LZSSE) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lzsse/README.md") - message (ERROR "submodule contrib/lzsse is missing. to fix try run: \n git submodule update --init --recursive") -endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index eccd1dc1430..710c8c7fca5 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -58,18 +58,6 @@ if (USE_INTERNAL_XZ_LIBRARY) add_subdirectory (xz) endif() -if (USE_LIZARD) - add_subdirectory (lizard-cmake) -endif () - -if (USE_DENSITY) - add_subdirectory (density-cmake) -endif () - -if (USE_LZSSE) - add_subdirectory (lzsse-cmake) -endif () - add_subdirectory (poco-cmake) add_subdirectory (croaring-cmake) diff --git a/contrib/density b/contrib/density deleted file mode 160000 index 67bd584bd74..00000000000 --- a/contrib/density +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 67bd584bd7414d4bc1418e5b7fcf9d68e44c3f28 diff --git a/contrib/density-cmake/CMakeLists.txt b/contrib/density-cmake/CMakeLists.txt deleted file mode 100644 index 844459c4105..00000000000 --- a/contrib/density-cmake/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -SET (USE_INTERNAL_DENSITY_LIBRARY 1) -SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/density") - -# cd contrib/density -# find . -name '*.c' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ ${LIBRARY_DIR}/' -SET (Sources - ${LIBRARY_DIR}/src/algorithms/algorithms.c - ${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_decode.c - ${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_encode.c - ${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_decode.c - ${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_encode.c - ${LIBRARY_DIR}/src/algorithms/dictionaries.c - ${LIBRARY_DIR}/src/algorithms/lion/core/lion_decode.c - ${LIBRARY_DIR}/src/algorithms/lion/core/lion_encode.c - ${LIBRARY_DIR}/src/algorithms/lion/forms/lion_form_model.c - ${LIBRARY_DIR}/src/buffers/buffer.c - ${LIBRARY_DIR}/src/globals.c - ${LIBRARY_DIR}/src/structure/header.c -) - -# cd contrib/density -# find . -name '*.h' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ ${LIBRARY_DIR}/' -SET (Headers - ${LIBRARY_DIR}/src/algorithms/algorithms.h - ${LIBRARY_DIR}/src/algorithms/chameleon/chameleon.h - ${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_decode.h - ${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_encode.h - ${LIBRARY_DIR}/src/algorithms/chameleon/dictionary/chameleon_dictionary.h - ${LIBRARY_DIR}/src/algorithms/cheetah/cheetah.h - ${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_decode.h - ${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_encode.h - ${LIBRARY_DIR}/src/algorithms/cheetah/dictionary/cheetah_dictionary.h - ${LIBRARY_DIR}/src/algorithms/dictionaries.h - ${LIBRARY_DIR}/src/algorithms/lion/core/lion_decode.h - ${LIBRARY_DIR}/src/algorithms/lion/core/lion_encode.h - ${LIBRARY_DIR}/src/algorithms/lion/dictionary/lion_dictionary.h - ${LIBRARY_DIR}/src/algorithms/lion/forms/lion_form_model.h - ${LIBRARY_DIR}/src/algorithms/lion/lion.h - ${LIBRARY_DIR}/src/buffers/buffer.h - ${LIBRARY_DIR}/src/density_api.h - ${LIBRARY_DIR}/src/globals.h - ${LIBRARY_DIR}/src/structure/header.h -) - -ADD_LIBRARY(density ${Sources} ${Headers}) - -target_include_directories (density PUBLIC ${LIBRARY_DIR}) diff --git a/contrib/lizard b/contrib/lizard deleted file mode 160000 index af8518ccb8c..00000000000 --- a/contrib/lizard +++ /dev/null @@ -1 +0,0 @@ -Subproject commit af8518ccb8c68e062a8c80205ff07d56a2e77dd4 diff --git a/contrib/lizard-cmake/CMakeLists.txt b/contrib/lizard-cmake/CMakeLists.txt deleted file mode 100644 index 7fe5274ecb6..00000000000 --- a/contrib/lizard-cmake/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -SET (USE_INTERNAL_LIZARD_LIBRARY 1) -SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lizard") - -SET (Sources - "${LIBRARY_DIR}/lib/lizard_decompress.c" - "${LIBRARY_DIR}/lib/lizard_compress.c" -) - -SET (Headers - "${LIBRARY_DIR}/lib/lizard_compress.h" - "${LIBRARY_DIR}/lib/lizard_common.h" -) - -ADD_LIBRARY(lizard ${Sources} ${Headers}) - -target_include_directories (lizard PUBLIC ${LIBRARY_DIR}) - -# It is using some symbols (HUF_* and FSE_*) from zstd. -target_link_libraries (lizard PRIVATE zstd) diff --git a/contrib/lzsse b/contrib/lzsse deleted file mode 160000 index 1847c3e8279..00000000000 --- a/contrib/lzsse +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1847c3e82794400deb56edd30d8aa3f445fd000b diff --git a/contrib/lzsse-cmake/CMakeLists.txt b/contrib/lzsse-cmake/CMakeLists.txt deleted file mode 100644 index 5ec5b312009..00000000000 --- a/contrib/lzsse-cmake/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -SET (USE_INTERNAL_LZSSE_LIBRARY 1) -SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lzsse") - -SET (Sources - "${LIBRARY_DIR}/lzsse2/lzsse2.cpp" - "${LIBRARY_DIR}/lzsse4/lzsse4.cpp" - "${LIBRARY_DIR}/lzsse8/lzsse8.cpp" -) - -SET (Headers - "${LIBRARY_DIR}/lzsse2/lzsse2.h" - "${LIBRARY_DIR}/lzsse4/lzsse4.h" - "${LIBRARY_DIR}/lzsse8/lzsse8.h" - "${LIBRARY_DIR}/lzsse2/lzsse2_platform.h" - "${LIBRARY_DIR}/lzsse4/lzsse4_platform.h" - "${LIBRARY_DIR}/lzsse8/lzsse8_platform.h" -) - -ADD_LIBRARY(lzsse ${Sources} ${Headers}) - -target_include_directories (lzsse SYSTEM PUBLIC ${LIBRARY_DIR}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index af5ea3772ae..26a68fcbd14 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -380,18 +380,6 @@ if (XZ_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${XZ_INCLUDE_DIR}) endif() -if (USE_LIZARD) - dbms_target_link_libraries(PRIVATE lizard) -endif() - -if (USE_DENSITY) - dbms_target_link_libraries(PRIVATE density) -endif() - -if (USE_LZSSE) - dbms_target_link_libraries(PRIVATE lzsse) -endif() - if (USE_ICU) dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES}) dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 7396e6f6222..df27a7b7d9e 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -16,7 +16,4 @@ #cmakedefine01 USE_STATS #cmakedefine01 USE_DATASKETCHES #cmakedefine01 USE_YAML_CPP -#cmakedefine01 USE_LIZARD -#cmakedefine01 USE_DENSITY -#cmakedefine01 USE_LZSSE #cmakedefine01 CLICKHOUSE_SPLIT_BINARY diff --git a/src/Compression/CompressionCodecDensity.cpp b/src/Compression/CompressionCodecDensity.cpp deleted file mode 100644 index e462ff2765b..00000000000 --- a/src/Compression/CompressionCodecDensity.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -#endif - -#if USE_DENSITY - -#include -#include -#include -#include - -#include - - -namespace DB -{ - -class CompressionCodecDensity : public ICompressionCodec -{ -public: - explicit CompressionCodecDensity(DENSITY_ALGORITHM algo_); - - uint8_t getMethodByte() const override; - - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - - void updateHash(SipHash & hash) const override; - -protected: - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - bool isExperimental() const override { return true; } - -private: - const DENSITY_ALGORITHM algo; -}; - - -namespace ErrorCodes -{ - extern const int CANNOT_COMPRESS; - extern const int CANNOT_DECOMPRESS; - extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; - extern const int ILLEGAL_CODEC_PARAMETER; -} - -CompressionCodecDensity::CompressionCodecDensity(DENSITY_ALGORITHM algo_) : algo(algo_) -{ - setCodecDescription("Density", {std::make_shared(static_cast(algo))}); -} - -uint8_t CompressionCodecDensity::getMethodByte() const -{ - return static_cast(CompressionMethodByte::Density); -} - -void CompressionCodecDensity::updateHash(SipHash & hash) const -{ - getCodecDesc()->updateTreeHash(hash); -} - -UInt32 CompressionCodecDensity::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - return density_compress_safe_size(uncompressed_size); -} - -UInt32 CompressionCodecDensity::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - density_processing_result res = density_compress(reinterpret_cast(source), source_size, reinterpret_cast(dest), density_compress_safe_size(source_size), algo); - if (res.state != DENSITY_STATE_OK) - throw Exception("Cannot compress block with Density", ErrorCodes::CANNOT_COMPRESS); - return res.bytesWritten; -} - -void CompressionCodecDensity::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ - density_processing_result res = density_decompress(reinterpret_cast(source), source_size, reinterpret_cast(dest), density_decompress_safe_size(uncompressed_size)); - if (res.state != DENSITY_STATE_OK) - throw Exception("Cannot decompress block with Density", ErrorCodes::CANNOT_DECOMPRESS); -} - -void registerCodecDensity(CompressionCodecFactory & factory) -{ - UInt8 method_code = UInt8(CompressionMethodByte::Density); - factory.registerCompressionCodec( - "Density", - method_code, - [&](const ASTPtr & arguments) -> CompressionCodecPtr - { - DENSITY_ALGORITHM algorithm = DENSITY_ALGORITHM_CHAMELEON; - - if (arguments && !arguments->children.empty()) - { - if (arguments->children.size() != 1) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, - "Density codec must have only one parameter, given {}", arguments->children.size()); - - const auto children = arguments->children; - - const auto * algo_literal = children[0]->as(); - if (!algo_literal || algo_literal->value.getType() != Field::Types::UInt64) - throw Exception("Density codec argument must be integer", - ErrorCodes::ILLEGAL_CODEC_PARAMETER); - - const UInt64 algorithm_num = algo_literal->value.safeGet(); - if (algorithm_num < 1 || algorithm_num > 3) - throw Exception("Density codec level can be 1, 2 or 3.", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - - algorithm = static_cast(algorithm_num); - } - - return std::make_shared(algorithm); - }); -} - -} - -#endif diff --git a/src/Compression/CompressionCodecLZSSE.cpp b/src/Compression/CompressionCodecLZSSE.cpp deleted file mode 100644 index 62fb641bc12..00000000000 --- a/src/Compression/CompressionCodecLZSSE.cpp +++ /dev/null @@ -1,180 +0,0 @@ -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -#endif - -#if USE_LZSSE - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - -class CompressionCodecLZSSE : public ICompressionCodec -{ -public: - explicit CompressionCodecLZSSE(UInt32 type_, UInt32 level_); - - uint8_t getMethodByte() const override; - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - void updateHash(SipHash & hash) const override; - -protected: - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - bool isExperimental() const override { return true; } - -private: - const UInt32 type; - const UInt32 level; -}; - - -namespace ErrorCodes -{ - extern const int CANNOT_COMPRESS; - extern const int CANNOT_DECOMPRESS; - extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; - extern const int ILLEGAL_CODEC_PARAMETER; - extern const int LOGICAL_ERROR; -} - -CompressionCodecLZSSE::CompressionCodecLZSSE(UInt32 type_, UInt32 level_) : type(type_), level(level_) -{ - if (type != 2 && type != 4 && type != 8) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no LZSSE{} codec", type); - - setCodecDescription(fmt::format("LZSSE{}", type), {std::make_shared(static_cast(level))}); -} - -uint8_t CompressionCodecLZSSE::getMethodByte() const -{ - switch (type) - { - case 2: return static_cast(CompressionMethodByte::LZSSE2); - case 4: return static_cast(CompressionMethodByte::LZSSE4); - case 8: return static_cast(CompressionMethodByte::LZSSE8); - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no LZSSE{} codec", type); - } -} - -void CompressionCodecLZSSE::updateHash(SipHash & hash) const -{ - getCodecDesc()->updateTreeHash(hash); -} - -UInt32 CompressionCodecLZSSE::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - return uncompressed_size; -} - -UInt32 CompressionCodecLZSSE::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - UInt32 res = 0; - switch (type) - { - case 2: - { - LZSSE2_OptimalParseState * state = LZSSE2_MakeOptimalParseState(source_size); - res = LZSSE2_CompressOptimalParse(state, source, source_size, dest, source_size, level); - LZSSE2_FreeOptimalParseState(state); - break; - } - case 4: - { - LZSSE4_OptimalParseState * state = LZSSE4_MakeOptimalParseState(source_size); - res = LZSSE4_CompressOptimalParse(state, source, source_size, dest, source_size, level); - LZSSE4_FreeOptimalParseState(state); - break; - } - case 8: - { - LZSSE8_OptimalParseState * state = LZSSE8_MakeOptimalParseState(source_size); - res = LZSSE8_CompressOptimalParse(state, source, source_size, dest, source_size, level); - LZSSE8_FreeOptimalParseState(state); - break; - } - default: - break; - } - - if (res == 0) - throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress block with LZSSE{}", type); - return res; -} - -void CompressionCodecLZSSE::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ - UInt32 res = 0; - switch (type) - { - case 2: - { - res = LZSSE2_Decompress(source, source_size, dest, uncompressed_size); - break; - } - case 4: - { - res = LZSSE4_Decompress(source, source_size, dest, uncompressed_size); - break; - } - case 8: - { - res = LZSSE8_Decompress(source, source_size, dest, uncompressed_size); - break; - } - default: - break; - } - if (res < uncompressed_size) - throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress block with LZSSE{}", type); -} - -void registerCodecsLZSSE(CompressionCodecFactory & factory) -{ - for (auto [type, method_byte] : std::initializer_list> - { - {2, CompressionMethodByte::LZSSE2}, - {4, CompressionMethodByte::LZSSE4}, - {8, CompressionMethodByte::LZSSE8} - }) - { - factory.registerCompressionCodec( - fmt::format("LZSSE{}", type), - uint8_t(method_byte), - [type = type](const ASTPtr & arguments) -> CompressionCodecPtr - { - int level = 1; - if (arguments && !arguments->children.empty()) - { - if (arguments->children.size() != 1) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, - "LZSSE{} codec must have 1 parameter, {} given", type, arguments->children.size()); - - const auto children = arguments->children; - const auto * level_literal = children[0]->as(); - if (!level_literal) - throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, - "LZSSE{} first codec argument must be integer", type); - - level = level_literal->value.safeGet(); - } - - return std::make_shared(type, level); - }); - } -} - -} - -#endif diff --git a/src/Compression/CompressionCodecLizard.cpp b/src/Compression/CompressionCodecLizard.cpp deleted file mode 100644 index cd2d49a0538..00000000000 --- a/src/Compression/CompressionCodecLizard.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -#endif - -#if USE_LIZARD - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ -class CompressionCodecLizard : public ICompressionCodec -{ -public: - static constexpr auto LIZARD_DEFAULT_LEVEL = 1; - - explicit CompressionCodecLizard(int level_); - - uint8_t getMethodByte() const override; - - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - - void updateHash(SipHash & hash) const override; - -protected: - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - bool isExperimental() const override { return true; } - -private: - const int level; -}; - - -namespace ErrorCodes -{ - extern const int CANNOT_COMPRESS; - extern const int CANNOT_DECOMPRESS; - extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; - extern const int ILLEGAL_CODEC_PARAMETER; -} - -CompressionCodecLizard::CompressionCodecLizard(int level_) : level(level_) -{ - setCodecDescription("Lizard", {std::make_shared(static_cast(level))}); -} - -uint8_t CompressionCodecLizard::getMethodByte() const -{ - return static_cast(CompressionMethodByte::Lizard); -} - -void CompressionCodecLizard::updateHash(SipHash & hash) const -{ - getCodecDesc()->updateTreeHash(hash); -} - -UInt32 CompressionCodecLizard::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - return Lizard_compressBound(uncompressed_size); -} - -UInt32 CompressionCodecLizard::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ - int res = Lizard_compress(source, dest, source_size, Lizard_compressBound(source_size), level); - - if (res == 0) - throw Exception("Cannot compress block with Lizard", ErrorCodes::CANNOT_COMPRESS); - return res; -} - -void CompressionCodecLizard::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ - int res = Lizard_decompress_safe(source, dest, source_size, uncompressed_size); - - if (res < 0) - throw Exception("Cannot compress block with Lizard", ErrorCodes::CANNOT_DECOMPRESS); -} - -void registerCodecLizard(CompressionCodecFactory & factory) -{ - UInt8 method_code = UInt8(CompressionMethodByte::Lizard); - factory.registerCompressionCodec( - "Lizard", - method_code, - [&](const ASTPtr & arguments) -> CompressionCodecPtr - { - int level = CompressionCodecLizard::LIZARD_DEFAULT_LEVEL; - if (arguments && !arguments->children.empty()) - { - if (arguments->children.size() > 1) - throw Exception( - "Lizard codec must have 1 parameter, given " + std::to_string(arguments->children.size()), - ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); - - const auto children = arguments->children; - const auto * literal = children[0]->as(); - if (!literal) - throw Exception("Lizard codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - - level = literal->value.safeGet(); - // compression level will be truncated to LIZARD_MAX_CLEVEL if it is greater and to LIZARD_MIN_CLEVEL if it is less - //if (level > 1)//ZSTD_maxCLevel()) - // throw Exception("Lizard codec can't have level more that " + toString(1/*ZSTD_maxCLevel()*/) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER); - } - - return std::make_shared(level); - }); -} - -} - -#endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index fbd369fdfe7..dc65713471c 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -339,18 +339,6 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); -#if USE_LIZARD -void registerCodecLizard(CompressionCodecFactory & factory); -#endif - -#if USE_DENSITY -void registerCodecDensity(CompressionCodecFactory & factory); -#endif - -#if USE_LZSSE -void registerCodecsLZSSE(CompressionCodecFactory & factory); -#endif - CompressionCodecFactory::CompressionCodecFactory() { registerCodecLZ4(*this); @@ -362,15 +350,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDoubleDelta(*this); registerCodecGorilla(*this); registerCodecMultiple(*this); -#if USE_LIZARD - registerCodecLizard(*this); -#endif -#if USE_DENSITY - registerCodecDensity(*this); -#endif -#if USE_LZSSE - registerCodecsLZSSE(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 44bed7d3c1d..58a39bb12a4 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -43,12 +43,6 @@ enum class CompressionMethodByte : uint8_t T64 = 0x93, DoubleDelta = 0x94, Gorilla = 0x95, - Lizard = 0x96, - Density = 0x97, - LZSSE2 = 0x98, - LZSSE4 = 0x99, - LZSSE8 = 0xA0, - }; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f4692b7e41f..08b69e52bbc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -240,7 +240,7 @@ class IColumn; M(Bool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.", 0) \ M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ - M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (LZSSE2/4/8, Lizard, Density). These codecs are provided for evaluation purposes.", 0) \ + M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(UInt64, odbc_max_field_size, 1024, "Max size of filed can be read from ODBC dictionary. Long strings are truncated.", 0) \ M(UInt64, query_profiler_real_time_period_ns, 1000000000, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, query_profiler_cpu_time_period_ns, 1000000000, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index da923ca23a1..e250e013913 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -15,6 +15,3 @@ #cmakedefine01 USE_LIBPQXX #cmakedefine01 USE_NURAFT #cmakedefine01 USE_KRB5 -#cmakedefine01 USE_LIZARD -#cmakedefine01 USE_DENSITY -#cmakedefine01 USE_LZSSE diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index 54e14132d3b..0cfffd28e12 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -138,19 +138,3 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): assert node5.query( "SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" - -def test_experimental_codecs(start_cluster): - node6.query(""" - CREATE TABLE compression_experimental_codecs ( - somedate Date CODEC(Lizard(12)), - id UInt64 CODEC(Density(3)), - data String CODEC(LZSSE4(3)) - ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; - """) - - node6.query( - "INSERT INTO compression_experimental_codecs VALUES(toDate('2018-10-12'), 100000, '{}')".format( - 'c' * 10000)) - - assert node6.query( - "SELECT max(length(data)) from compression_experimental_codecs GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" diff --git a/tests/queries/1_stateful/00096_experimental_codecs.reference b/tests/queries/1_stateful/00096_experimental_codecs.reference deleted file mode 100644 index a34182c23e5..00000000000 --- a/tests/queries/1_stateful/00096_experimental_codecs.reference +++ /dev/null @@ -1,8 +0,0 @@ -17415138241754778329 -17415138241754778329 -17415138241754778329 -17415138241754778329 -17415138241754778329 -3798944011853532000 -3798944011853532000 -3798944011853532000 diff --git a/tests/queries/1_stateful/00096_experimental_codecs.sql b/tests/queries/1_stateful/00096_experimental_codecs.sql deleted file mode 100644 index e085e55f886..00000000000 --- a/tests/queries/1_stateful/00096_experimental_codecs.sql +++ /dev/null @@ -1,61 +0,0 @@ -DROP TABLE IF EXISTS hits_experimental; - --- It's not allowed to create a table with experimental codecs unless the user turns off the safety switch. -CREATE TABLE hits_experimental (Title String CODEC(Lizard(10))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } - -SET allow_experimental_codecs = 1; - --- Lizard - -CREATE TABLE hits_experimental (Title String CODEC(Lizard(10))) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits; -SELECT sum(cityHash64(*)) FROM hits_experimental; - --- It's always allowed to attach a table with experimental codecs. -DETACH TABLE hits_experimental; -SET allow_experimental_codecs = 0; -ATTACH TABLE hits_experimental; -SELECT sum(cityHash64(*)) FROM hits_experimental; -SET allow_experimental_codecs = 1; - -DROP TABLE hits_experimental; - --- Density - --- Check out of range levels of Density. -CREATE TABLE hits_experimental (Title String CODEC(Density(-1))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 } -CREATE TABLE hits_experimental (Title String CODEC(Density(0))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 } -CREATE TABLE hits_experimental (Title String CODEC(Density(4))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 } -CREATE TABLE hits_experimental (Title String CODEC(Density('hello'))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 } - -CREATE TABLE hits_experimental (Title String CODEC(Density(1))) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental; - -CREATE TABLE hits_experimental (Title String CODEC(Density(2))) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental; - -CREATE TABLE hits_experimental (Title String CODEC(Density(3))) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental; - --- LZSSE - -CREATE TABLE hits_experimental (Title String CODEC(LZSSE2)) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental; - -CREATE TABLE hits_experimental (Title String CODEC(LZSSE4)) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental; - -CREATE TABLE hits_experimental (Title String CODEC(LZSSE8)) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000; -SELECT sum(cityHash64(*)) FROM hits_experimental; -DROP TABLE hits_experimental;