mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Remove experimental compression codecs
This commit is contained in:
parent
31f02b3780
commit
8d85145725
9
.gitmodules
vendored
9
.gitmodules
vendored
@ -228,15 +228,6 @@
|
||||
[submodule "contrib/datasketches-cpp"]
|
||||
path = contrib/datasketches-cpp
|
||||
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git
|
||||
[submodule "contrib/lizard"]
|
||||
path = contrib/lizard
|
||||
url = https://github.com/inikep/lizard
|
||||
[submodule "contrib/density"]
|
||||
path = contrib/density
|
||||
url = https://github.com/centaurean/density.git
|
||||
[submodule "contrib/lzsse"]
|
||||
path = contrib/lzsse
|
||||
url = https://github.com/ConorStokes/LZSSE.git
|
||||
[submodule "contrib/yaml-cpp"]
|
||||
path = contrib/yaml-cpp
|
||||
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
|
||||
|
@ -527,9 +527,6 @@ include (cmake/find/rocksdb.cmake)
|
||||
include (cmake/find/libpqxx.cmake)
|
||||
include (cmake/find/nuraft.cmake)
|
||||
include (cmake/find/yaml-cpp.cmake)
|
||||
include (cmake/find/lizard.cmake)
|
||||
include (cmake/find/lzsse.cmake)
|
||||
include (cmake/find/density.cmake)
|
||||
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ENABLE_ORC OFF CACHE INTERNAL "")
|
||||
|
@ -1,9 +0,0 @@
|
||||
option(USE_DENSITY "Enable DENSITY experimental compression library" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT USE_DENSITY)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/density/README.md")
|
||||
message (ERROR "submodule contrib/density is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
@ -1,9 +0,0 @@
|
||||
option(USE_LIZARD "Enable Lizard experimental compression library" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT USE_LIZARD)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lizard/README.md")
|
||||
message (ERROR "submodule contrib/lizard is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
@ -1,14 +0,0 @@
|
||||
set (DEFAULT_USE_LZSSE 0)
|
||||
if (ENABLE_LIBRARIES AND ARCH_AMD64)
|
||||
set (DEFAULT_USE_LZSSE 1)
|
||||
endif()
|
||||
|
||||
option(USE_LZSSE "Enable LZSSE experimental compression library" ${DEFAULT_USE_LZSSE})
|
||||
|
||||
if (NOT USE_LZSSE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lzsse/README.md")
|
||||
message (ERROR "submodule contrib/lzsse is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
12
contrib/CMakeLists.txt
vendored
12
contrib/CMakeLists.txt
vendored
@ -58,18 +58,6 @@ if (USE_INTERNAL_XZ_LIBRARY)
|
||||
add_subdirectory (xz)
|
||||
endif()
|
||||
|
||||
if (USE_LIZARD)
|
||||
add_subdirectory (lizard-cmake)
|
||||
endif ()
|
||||
|
||||
if (USE_DENSITY)
|
||||
add_subdirectory (density-cmake)
|
||||
endif ()
|
||||
|
||||
if (USE_LZSSE)
|
||||
add_subdirectory (lzsse-cmake)
|
||||
endif ()
|
||||
|
||||
add_subdirectory (poco-cmake)
|
||||
add_subdirectory (croaring-cmake)
|
||||
|
||||
|
1
contrib/density
vendored
1
contrib/density
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 67bd584bd7414d4bc1418e5b7fcf9d68e44c3f28
|
@ -1,47 +0,0 @@
|
||||
SET (USE_INTERNAL_DENSITY_LIBRARY 1)
|
||||
SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/density")
|
||||
|
||||
# cd contrib/density
|
||||
# find . -name '*.c' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ ${LIBRARY_DIR}/'
|
||||
SET (Sources
|
||||
${LIBRARY_DIR}/src/algorithms/algorithms.c
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_decode.c
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_encode.c
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_decode.c
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_encode.c
|
||||
${LIBRARY_DIR}/src/algorithms/dictionaries.c
|
||||
${LIBRARY_DIR}/src/algorithms/lion/core/lion_decode.c
|
||||
${LIBRARY_DIR}/src/algorithms/lion/core/lion_encode.c
|
||||
${LIBRARY_DIR}/src/algorithms/lion/forms/lion_form_model.c
|
||||
${LIBRARY_DIR}/src/buffers/buffer.c
|
||||
${LIBRARY_DIR}/src/globals.c
|
||||
${LIBRARY_DIR}/src/structure/header.c
|
||||
)
|
||||
|
||||
# cd contrib/density
|
||||
# find . -name '*.h' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ ${LIBRARY_DIR}/'
|
||||
SET (Headers
|
||||
${LIBRARY_DIR}/src/algorithms/algorithms.h
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/chameleon.h
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_decode.h
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/core/chameleon_encode.h
|
||||
${LIBRARY_DIR}/src/algorithms/chameleon/dictionary/chameleon_dictionary.h
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/cheetah.h
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_decode.h
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/core/cheetah_encode.h
|
||||
${LIBRARY_DIR}/src/algorithms/cheetah/dictionary/cheetah_dictionary.h
|
||||
${LIBRARY_DIR}/src/algorithms/dictionaries.h
|
||||
${LIBRARY_DIR}/src/algorithms/lion/core/lion_decode.h
|
||||
${LIBRARY_DIR}/src/algorithms/lion/core/lion_encode.h
|
||||
${LIBRARY_DIR}/src/algorithms/lion/dictionary/lion_dictionary.h
|
||||
${LIBRARY_DIR}/src/algorithms/lion/forms/lion_form_model.h
|
||||
${LIBRARY_DIR}/src/algorithms/lion/lion.h
|
||||
${LIBRARY_DIR}/src/buffers/buffer.h
|
||||
${LIBRARY_DIR}/src/density_api.h
|
||||
${LIBRARY_DIR}/src/globals.h
|
||||
${LIBRARY_DIR}/src/structure/header.h
|
||||
)
|
||||
|
||||
ADD_LIBRARY(density ${Sources} ${Headers})
|
||||
|
||||
target_include_directories (density PUBLIC ${LIBRARY_DIR})
|
1
contrib/lizard
vendored
1
contrib/lizard
vendored
@ -1 +0,0 @@
|
||||
Subproject commit af8518ccb8c68e062a8c80205ff07d56a2e77dd4
|
@ -1,19 +0,0 @@
|
||||
SET (USE_INTERNAL_LIZARD_LIBRARY 1)
|
||||
SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lizard")
|
||||
|
||||
SET (Sources
|
||||
"${LIBRARY_DIR}/lib/lizard_decompress.c"
|
||||
"${LIBRARY_DIR}/lib/lizard_compress.c"
|
||||
)
|
||||
|
||||
SET (Headers
|
||||
"${LIBRARY_DIR}/lib/lizard_compress.h"
|
||||
"${LIBRARY_DIR}/lib/lizard_common.h"
|
||||
)
|
||||
|
||||
ADD_LIBRARY(lizard ${Sources} ${Headers})
|
||||
|
||||
target_include_directories (lizard PUBLIC ${LIBRARY_DIR})
|
||||
|
||||
# It is using some symbols (HUF_* and FSE_*) from zstd.
|
||||
target_link_libraries (lizard PRIVATE zstd)
|
1
contrib/lzsse
vendored
1
contrib/lzsse
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 1847c3e82794400deb56edd30d8aa3f445fd000b
|
@ -1,21 +0,0 @@
|
||||
SET (USE_INTERNAL_LZSSE_LIBRARY 1)
|
||||
SET (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lzsse")
|
||||
|
||||
SET (Sources
|
||||
"${LIBRARY_DIR}/lzsse2/lzsse2.cpp"
|
||||
"${LIBRARY_DIR}/lzsse4/lzsse4.cpp"
|
||||
"${LIBRARY_DIR}/lzsse8/lzsse8.cpp"
|
||||
)
|
||||
|
||||
SET (Headers
|
||||
"${LIBRARY_DIR}/lzsse2/lzsse2.h"
|
||||
"${LIBRARY_DIR}/lzsse4/lzsse4.h"
|
||||
"${LIBRARY_DIR}/lzsse8/lzsse8.h"
|
||||
"${LIBRARY_DIR}/lzsse2/lzsse2_platform.h"
|
||||
"${LIBRARY_DIR}/lzsse4/lzsse4_platform.h"
|
||||
"${LIBRARY_DIR}/lzsse8/lzsse8_platform.h"
|
||||
)
|
||||
|
||||
ADD_LIBRARY(lzsse ${Sources} ${Headers})
|
||||
|
||||
target_include_directories (lzsse SYSTEM PUBLIC ${LIBRARY_DIR})
|
@ -380,18 +380,6 @@ if (XZ_LIBRARY)
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${XZ_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if (USE_LIZARD)
|
||||
dbms_target_link_libraries(PRIVATE lizard)
|
||||
endif()
|
||||
|
||||
if (USE_DENSITY)
|
||||
dbms_target_link_libraries(PRIVATE density)
|
||||
endif()
|
||||
|
||||
if (USE_LZSSE)
|
||||
dbms_target_link_libraries(PRIVATE lzsse)
|
||||
endif()
|
||||
|
||||
if (USE_ICU)
|
||||
dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES})
|
||||
dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS})
|
||||
|
@ -16,7 +16,4 @@
|
||||
#cmakedefine01 USE_STATS
|
||||
#cmakedefine01 USE_DATASKETCHES
|
||||
#cmakedefine01 USE_YAML_CPP
|
||||
#cmakedefine01 USE_LIZARD
|
||||
#cmakedefine01 USE_DENSITY
|
||||
#cmakedefine01 USE_LZSSE
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
|
@ -1,122 +0,0 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_DENSITY
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
#include <src/density_api.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecDensity : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
explicit CompressionCodecDensity(DENSITY_ALGORITHM algo_);
|
||||
|
||||
uint8_t getMethodByte() const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
void updateHash(SipHash & hash) const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return true; }
|
||||
bool isExperimental() const override { return true; }
|
||||
|
||||
private:
|
||||
const DENSITY_ALGORITHM algo;
|
||||
};
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
}
|
||||
|
||||
CompressionCodecDensity::CompressionCodecDensity(DENSITY_ALGORITHM algo_) : algo(algo_)
|
||||
{
|
||||
setCodecDescription("Density", {std::make_shared<ASTLiteral>(static_cast<UInt64>(algo))});
|
||||
}
|
||||
|
||||
uint8_t CompressionCodecDensity::getMethodByte() const
|
||||
{
|
||||
return static_cast<uint8_t>(CompressionMethodByte::Density);
|
||||
}
|
||||
|
||||
void CompressionCodecDensity::updateHash(SipHash & hash) const
|
||||
{
|
||||
getCodecDesc()->updateTreeHash(hash);
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecDensity::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
return density_compress_safe_size(uncompressed_size);
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecDensity::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
density_processing_result res = density_compress(reinterpret_cast<const uint8_t *>(source), source_size, reinterpret_cast<uint8_t *>(dest), density_compress_safe_size(source_size), algo);
|
||||
if (res.state != DENSITY_STATE_OK)
|
||||
throw Exception("Cannot compress block with Density", ErrorCodes::CANNOT_COMPRESS);
|
||||
return res.bytesWritten;
|
||||
}
|
||||
|
||||
void CompressionCodecDensity::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
density_processing_result res = density_decompress(reinterpret_cast<const uint8_t *>(source), source_size, reinterpret_cast<uint8_t *>(dest), density_decompress_safe_size(uncompressed_size));
|
||||
if (res.state != DENSITY_STATE_OK)
|
||||
throw Exception("Cannot decompress block with Density", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
|
||||
void registerCodecDensity(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::Density);
|
||||
factory.registerCompressionCodec(
|
||||
"Density",
|
||||
method_code,
|
||||
[&](const ASTPtr & arguments) -> CompressionCodecPtr
|
||||
{
|
||||
DENSITY_ALGORITHM algorithm = DENSITY_ALGORITHM_CHAMELEON;
|
||||
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE,
|
||||
"Density codec must have only one parameter, given {}", arguments->children.size());
|
||||
|
||||
const auto children = arguments->children;
|
||||
|
||||
const auto * algo_literal = children[0]->as<ASTLiteral>();
|
||||
if (!algo_literal || algo_literal->value.getType() != Field::Types::UInt64)
|
||||
throw Exception("Density codec argument must be integer",
|
||||
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
const UInt64 algorithm_num = algo_literal->value.safeGet<UInt64>();
|
||||
if (algorithm_num < 1 || algorithm_num > 3)
|
||||
throw Exception("Density codec level can be 1, 2 or 3.", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
algorithm = static_cast<DENSITY_ALGORITHM>(algorithm_num);
|
||||
}
|
||||
|
||||
return std::make_shared<CompressionCodecDensity>(algorithm);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,180 +0,0 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_LZSSE
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
#include <lzsse2/lzsse2.h>
|
||||
#include <lzsse4/lzsse4.h>
|
||||
#include <lzsse8/lzsse8.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecLZSSE : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
explicit CompressionCodecLZSSE(UInt32 type_, UInt32 level_);
|
||||
|
||||
uint8_t getMethodByte() const override;
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
void updateHash(SipHash & hash) const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return true; }
|
||||
bool isExperimental() const override { return true; }
|
||||
|
||||
private:
|
||||
const UInt32 type;
|
||||
const UInt32 level;
|
||||
};
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
CompressionCodecLZSSE::CompressionCodecLZSSE(UInt32 type_, UInt32 level_) : type(type_), level(level_)
|
||||
{
|
||||
if (type != 2 && type != 4 && type != 8)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no LZSSE{} codec", type);
|
||||
|
||||
setCodecDescription(fmt::format("LZSSE{}", type), {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||
}
|
||||
|
||||
uint8_t CompressionCodecLZSSE::getMethodByte() const
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case 2: return static_cast<uint8_t>(CompressionMethodByte::LZSSE2);
|
||||
case 4: return static_cast<uint8_t>(CompressionMethodByte::LZSSE4);
|
||||
case 8: return static_cast<uint8_t>(CompressionMethodByte::LZSSE8);
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no LZSSE{} codec", type);
|
||||
}
|
||||
}
|
||||
|
||||
void CompressionCodecLZSSE::updateHash(SipHash & hash) const
|
||||
{
|
||||
getCodecDesc()->updateTreeHash(hash);
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLZSSE::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
return uncompressed_size;
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLZSSE::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
UInt32 res = 0;
|
||||
switch (type)
|
||||
{
|
||||
case 2:
|
||||
{
|
||||
LZSSE2_OptimalParseState * state = LZSSE2_MakeOptimalParseState(source_size);
|
||||
res = LZSSE2_CompressOptimalParse(state, source, source_size, dest, source_size, level);
|
||||
LZSSE2_FreeOptimalParseState(state);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
LZSSE4_OptimalParseState * state = LZSSE4_MakeOptimalParseState(source_size);
|
||||
res = LZSSE4_CompressOptimalParse(state, source, source_size, dest, source_size, level);
|
||||
LZSSE4_FreeOptimalParseState(state);
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
LZSSE8_OptimalParseState * state = LZSSE8_MakeOptimalParseState(source_size);
|
||||
res = LZSSE8_CompressOptimalParse(state, source, source_size, dest, source_size, level);
|
||||
LZSSE8_FreeOptimalParseState(state);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (res == 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress block with LZSSE{}", type);
|
||||
return res;
|
||||
}
|
||||
|
||||
void CompressionCodecLZSSE::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
UInt32 res = 0;
|
||||
switch (type)
|
||||
{
|
||||
case 2:
|
||||
{
|
||||
res = LZSSE2_Decompress(source, source_size, dest, uncompressed_size);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
res = LZSSE4_Decompress(source, source_size, dest, uncompressed_size);
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
res = LZSSE8_Decompress(source, source_size, dest, uncompressed_size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (res < uncompressed_size)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress block with LZSSE{}", type);
|
||||
}
|
||||
|
||||
void registerCodecsLZSSE(CompressionCodecFactory & factory)
|
||||
{
|
||||
for (auto [type, method_byte] : std::initializer_list<std::tuple<int, CompressionMethodByte>>
|
||||
{
|
||||
{2, CompressionMethodByte::LZSSE2},
|
||||
{4, CompressionMethodByte::LZSSE4},
|
||||
{8, CompressionMethodByte::LZSSE8}
|
||||
})
|
||||
{
|
||||
factory.registerCompressionCodec(
|
||||
fmt::format("LZSSE{}", type),
|
||||
uint8_t(method_byte),
|
||||
[type = type](const ASTPtr & arguments) -> CompressionCodecPtr
|
||||
{
|
||||
int level = 1;
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE,
|
||||
"LZSSE{} codec must have 1 parameter, {} given", type, arguments->children.size());
|
||||
|
||||
const auto children = arguments->children;
|
||||
const auto * level_literal = children[0]->as<ASTLiteral>();
|
||||
if (!level_literal)
|
||||
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER,
|
||||
"LZSSE{} first codec argument must be integer", type);
|
||||
|
||||
level = level_literal->value.safeGet<UInt64>();
|
||||
}
|
||||
|
||||
return std::make_shared<CompressionCodecLZSSE>(type, level);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,121 +0,0 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_LIZARD
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <lib/lizard_compress.h>
|
||||
#include <lib/lizard_decompress.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class CompressionCodecLizard : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
static constexpr auto LIZARD_DEFAULT_LEVEL = 1;
|
||||
|
||||
explicit CompressionCodecLizard(int level_);
|
||||
|
||||
uint8_t getMethodByte() const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
void updateHash(SipHash & hash) const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return true; }
|
||||
bool isExperimental() const override { return true; }
|
||||
|
||||
private:
|
||||
const int level;
|
||||
};
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
}
|
||||
|
||||
CompressionCodecLizard::CompressionCodecLizard(int level_) : level(level_)
|
||||
{
|
||||
setCodecDescription("Lizard", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||
}
|
||||
|
||||
uint8_t CompressionCodecLizard::getMethodByte() const
|
||||
{
|
||||
return static_cast<uint8_t>(CompressionMethodByte::Lizard);
|
||||
}
|
||||
|
||||
void CompressionCodecLizard::updateHash(SipHash & hash) const
|
||||
{
|
||||
getCodecDesc()->updateTreeHash(hash);
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLizard::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
return Lizard_compressBound(uncompressed_size);
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLizard::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
int res = Lizard_compress(source, dest, source_size, Lizard_compressBound(source_size), level);
|
||||
|
||||
if (res == 0)
|
||||
throw Exception("Cannot compress block with Lizard", ErrorCodes::CANNOT_COMPRESS);
|
||||
return res;
|
||||
}
|
||||
|
||||
void CompressionCodecLizard::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
||||
{
|
||||
int res = Lizard_decompress_safe(source, dest, source_size, uncompressed_size);
|
||||
|
||||
if (res < 0)
|
||||
throw Exception("Cannot compress block with Lizard", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
|
||||
void registerCodecLizard(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::Lizard);
|
||||
factory.registerCompressionCodec(
|
||||
"Lizard",
|
||||
method_code,
|
||||
[&](const ASTPtr & arguments) -> CompressionCodecPtr
|
||||
{
|
||||
int level = CompressionCodecLizard::LIZARD_DEFAULT_LEVEL;
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() > 1)
|
||||
throw Exception(
|
||||
"Lizard codec must have 1 parameter, given " + std::to_string(arguments->children.size()),
|
||||
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
||||
|
||||
const auto children = arguments->children;
|
||||
const auto * literal = children[0]->as<ASTLiteral>();
|
||||
if (!literal)
|
||||
throw Exception("Lizard codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
level = literal->value.safeGet<UInt64>();
|
||||
// compression level will be truncated to LIZARD_MAX_CLEVEL if it is greater and to LIZARD_MIN_CLEVEL if it is less
|
||||
//if (level > 1)//ZSTD_maxCLevel())
|
||||
// throw Exception("Lizard codec can't have level more that " + toString(1/*ZSTD_maxCLevel()*/) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
}
|
||||
|
||||
return std::make_shared<CompressionCodecLizard>(level);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -339,18 +339,6 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||
|
||||
#if USE_LIZARD
|
||||
void registerCodecLizard(CompressionCodecFactory & factory);
|
||||
#endif
|
||||
|
||||
#if USE_DENSITY
|
||||
void registerCodecDensity(CompressionCodecFactory & factory);
|
||||
#endif
|
||||
|
||||
#if USE_LZSSE
|
||||
void registerCodecsLZSSE(CompressionCodecFactory & factory);
|
||||
#endif
|
||||
|
||||
CompressionCodecFactory::CompressionCodecFactory()
|
||||
{
|
||||
registerCodecLZ4(*this);
|
||||
@ -362,15 +350,6 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
registerCodecMultiple(*this);
|
||||
#if USE_LIZARD
|
||||
registerCodecLizard(*this);
|
||||
#endif
|
||||
#if USE_DENSITY
|
||||
registerCodecDensity(*this);
|
||||
#endif
|
||||
#if USE_LZSSE
|
||||
registerCodecsLZSSE(*this);
|
||||
#endif
|
||||
|
||||
default_codec = get("LZ4", {});
|
||||
}
|
||||
|
@ -43,12 +43,6 @@ enum class CompressionMethodByte : uint8_t
|
||||
T64 = 0x93,
|
||||
DoubleDelta = 0x94,
|
||||
Gorilla = 0x95,
|
||||
Lizard = 0x96,
|
||||
Density = 0x97,
|
||||
LZSSE2 = 0x98,
|
||||
LZSSE4 = 0x99,
|
||||
LZSSE8 = 0xA0,
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ class IColumn;
|
||||
M(Bool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.", 0) \
|
||||
M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \
|
||||
M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
|
||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (LZSSE2/4/8, Lizard, Density). These codecs are provided for evaluation purposes.", 0) \
|
||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
||||
M(UInt64, odbc_max_field_size, 1024, "Max size of filed can be read from ODBC dictionary. Long strings are truncated.", 0) \
|
||||
M(UInt64, query_profiler_real_time_period_ns, 1000000000, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
M(UInt64, query_profiler_cpu_time_period_ns, 1000000000, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
|
@ -15,6 +15,3 @@
|
||||
#cmakedefine01 USE_LIBPQXX
|
||||
#cmakedefine01 USE_NURAFT
|
||||
#cmakedefine01 USE_KRB5
|
||||
#cmakedefine01 USE_LIZARD
|
||||
#cmakedefine01 USE_DENSITY
|
||||
#cmakedefine01 USE_LZSSE
|
||||
|
@ -138,19 +138,3 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster):
|
||||
|
||||
assert node5.query(
|
||||
"SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n"
|
||||
|
||||
def test_experimental_codecs(start_cluster):
|
||||
node6.query("""
|
||||
CREATE TABLE compression_experimental_codecs (
|
||||
somedate Date CODEC(Lizard(12)),
|
||||
id UInt64 CODEC(Density(3)),
|
||||
data String CODEC(LZSSE4(3))
|
||||
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2;
|
||||
""")
|
||||
|
||||
node6.query(
|
||||
"INSERT INTO compression_experimental_codecs VALUES(toDate('2018-10-12'), 100000, '{}')".format(
|
||||
'c' * 10000))
|
||||
|
||||
assert node6.query(
|
||||
"SELECT max(length(data)) from compression_experimental_codecs GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n"
|
||||
|
@ -1,8 +0,0 @@
|
||||
17415138241754778329
|
||||
17415138241754778329
|
||||
17415138241754778329
|
||||
17415138241754778329
|
||||
17415138241754778329
|
||||
3798944011853532000
|
||||
3798944011853532000
|
||||
3798944011853532000
|
@ -1,61 +0,0 @@
|
||||
DROP TABLE IF EXISTS hits_experimental;
|
||||
|
||||
-- It's not allowed to create a table with experimental codecs unless the user turns off the safety switch.
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Lizard(10))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
|
||||
SET allow_experimental_codecs = 1;
|
||||
|
||||
-- Lizard
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Lizard(10))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
|
||||
-- It's always allowed to attach a table with experimental codecs.
|
||||
DETACH TABLE hits_experimental;
|
||||
SET allow_experimental_codecs = 0;
|
||||
ATTACH TABLE hits_experimental;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
SET allow_experimental_codecs = 1;
|
||||
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
-- Density
|
||||
|
||||
-- Check out of range levels of Density.
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(-1))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 }
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(0))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 }
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(4))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 }
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density('hello'))) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 433 }
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(1))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(2))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(Density(3))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
-- LZSSE
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(LZSSE2)) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(LZSSE4)) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
||||
|
||||
CREATE TABLE hits_experimental (Title String CODEC(LZSSE8)) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO hits_experimental SELECT Title FROM test.hits ORDER BY Title LIMIT 100000;
|
||||
SELECT sum(cityHash64(*)) FROM hits_experimental;
|
||||
DROP TABLE hits_experimental;
|
Loading…
Reference in New Issue
Block a user