*added adapters' boilerplate for Lzma buffers, *added submodule to gitmodules, *added cmake for xz

This commit is contained in:
a.palagashvili 2020-11-01 02:56:41 +03:00
parent a78b04b56a
commit c1abf5d13c
9 changed files with 379 additions and 2 deletions

6
.gitmodules vendored
View File

@ -186,3 +186,9 @@
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
[submodule "contrib/xz-mirror"]
path = contrib/xz-mirror
url = https://github.com/xz-mirror/xz
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz

View File

@ -32,6 +32,7 @@ add_subdirectory (murmurhash)
add_subdirectory (replxx-cmake)
add_subdirectory (ryu-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (xz-cmake)
add_subdirectory (poco-cmake)

View File

@ -0,0 +1,246 @@
#############################################################################
#
# Very limited CMake support for building some parts of XZ Utils
#
# For now, this is indented to be useful to build static or shared liblzma
# on Windows with MSVC (to avoid the need to maintain Visual Studio project
# files). Building liblzma on a few other platforms should work too but it
# is somewhat experimental and not as portable as using ./configure.
#
# On some platforms this builds also xz and xzdec, but these are
# highly experimental and meant for testing only:
# - No large file support on those 32-bit platforms that need it
# - No replacement getopt_long(), libc must have it
# - No sandboxing support
# - No translations
# - No xz symlinks are installed
#
# Other missing things:
# - No xzgrep or other scripts or their symlinks
# - No tests (no test failures either!)
#
# NOTE: Even if the code compiles without warnings, the end result may be
# different than via ./configure. Specifically, the list of #defines
# may be different (if so, probably this CMakeLists.txt got them wrong).
#
# This file provides the following installation components (if you only
# need liblzma, install only its components!):
# - liblzma_Runtime
# - liblzma_Development
# - xz (on some platforms only)
# - xzdec (on some platforms only)
#
# To find the target liblzma::liblzma from other packages, use the CONFIG
# option with find_package() to avoid a conflict with the FindLibLZMA module
# with case-insensitive file systems. For example, to require liblzma 5.2.5
# or a newer compatible version:
#
# find_package(liblzma 5.2.5 REQUIRED CONFIG)
# target_link_libraries(my_application liblzma::liblzma)
#
#############################################################################
#
# Author: Lasse Collin
#
# This file has been put into the public domain.
# You can do whatever you want with this file.
#
#############################################################################
# Define library directory, where sources and header files are located
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/xz/src)
# Read file with version
file(READ ${LIBRARY_DIR}/liblzma/api/lzma/version.h XZ_VERSION)
string(REGEX REPLACE
"^.*\n\
#define LZMA_VERSION_MAJOR ([0-9]+)\n\
#define LZMA_VERSION_MINOR ([0-9]+)\n\
#define LZMA_VERSION_PATCH ([0-9]+)\n\
.*$"
"\\1.\\2.\\3" XZ_VERSION "${XZ_VERSION}")
# Parse version
MESSAGE(STATUS "LZMA VERSION ${XZ_VERSION}")
# cd contrib/xz/src
# find . -name '*.c' | grep -vP 'deprecated|legacy|/xz/' | sort | sed 's/^\./ ${LIBRARY_DIR}/'
SET(Sources
${LIBRARY_DIR}/common/tuklib_cpucores.c
${LIBRARY_DIR}/common/tuklib_exit.c
${LIBRARY_DIR}/common/tuklib_mbstr_fw.c
${LIBRARY_DIR}/common/tuklib_mbstr_width.c
${LIBRARY_DIR}/common/tuklib_open_stdxxx.c
${LIBRARY_DIR}/common/tuklib_physmem.c
${LIBRARY_DIR}/common/tuklib_progname.c
${LIBRARY_DIR}/liblzma/check/check.c
${LIBRARY_DIR}/liblzma/check/crc32_fast.c
${LIBRARY_DIR}/liblzma/check/crc32_small.c
${LIBRARY_DIR}/liblzma/check/crc32_table.c
${LIBRARY_DIR}/liblzma/check/crc32_tablegen.c
${LIBRARY_DIR}/liblzma/check/crc64_fast.c
${LIBRARY_DIR}/liblzma/check/crc64_small.c
${LIBRARY_DIR}/liblzma/check/crc64_table.c
${LIBRARY_DIR}/liblzma/check/crc64_tablegen.c
${LIBRARY_DIR}/liblzma/check/sha256.c
${LIBRARY_DIR}/liblzma/common/alone_decoder.c
${LIBRARY_DIR}/liblzma/common/alone_encoder.c
${LIBRARY_DIR}/liblzma/common/auto_decoder.c
${LIBRARY_DIR}/liblzma/common/block_buffer_decoder.c
${LIBRARY_DIR}/liblzma/common/block_buffer_encoder.c
${LIBRARY_DIR}/liblzma/common/block_decoder.c
${LIBRARY_DIR}/liblzma/common/block_encoder.c
${LIBRARY_DIR}/liblzma/common/block_header_decoder.c
${LIBRARY_DIR}/liblzma/common/block_header_encoder.c
${LIBRARY_DIR}/liblzma/common/block_util.c
${LIBRARY_DIR}/liblzma/common/common.c
${LIBRARY_DIR}/liblzma/common/easy_buffer_encoder.c
${LIBRARY_DIR}/liblzma/common/easy_decoder_memusage.c
${LIBRARY_DIR}/liblzma/common/easy_encoder.c
${LIBRARY_DIR}/liblzma/common/easy_encoder_memusage.c
${LIBRARY_DIR}/liblzma/common/easy_preset.c
${LIBRARY_DIR}/liblzma/common/file_info.c
${LIBRARY_DIR}/liblzma/common/filter_buffer_decoder.c
${LIBRARY_DIR}/liblzma/common/filter_buffer_encoder.c
${LIBRARY_DIR}/liblzma/common/filter_common.c
${LIBRARY_DIR}/liblzma/common/filter_decoder.c
${LIBRARY_DIR}/liblzma/common/filter_encoder.c
${LIBRARY_DIR}/liblzma/common/filter_flags_decoder.c
${LIBRARY_DIR}/liblzma/common/filter_flags_encoder.c
${LIBRARY_DIR}/liblzma/common/hardware_cputhreads.c
${LIBRARY_DIR}/liblzma/common/hardware_physmem.c
${LIBRARY_DIR}/liblzma/common/index.c
${LIBRARY_DIR}/liblzma/common/index_decoder.c
${LIBRARY_DIR}/liblzma/common/index_encoder.c
${LIBRARY_DIR}/liblzma/common/index_hash.c
${LIBRARY_DIR}/liblzma/common/outqueue.c
${LIBRARY_DIR}/liblzma/common/stream_buffer_decoder.c
${LIBRARY_DIR}/liblzma/common/stream_buffer_encoder.c
${LIBRARY_DIR}/liblzma/common/stream_decoder.c
${LIBRARY_DIR}/liblzma/common/stream_encoder.c
${LIBRARY_DIR}/liblzma/common/stream_encoder_mt.c
${LIBRARY_DIR}/liblzma/common/stream_flags_common.c
${LIBRARY_DIR}/liblzma/common/stream_flags_decoder.c
${LIBRARY_DIR}/liblzma/common/stream_flags_encoder.c
${LIBRARY_DIR}/liblzma/common/vli_decoder.c
${LIBRARY_DIR}/liblzma/common/vli_encoder.c
${LIBRARY_DIR}/liblzma/common/vli_size.c
${LIBRARY_DIR}/liblzma/delta/delta_common.c
${LIBRARY_DIR}/liblzma/delta/delta_decoder.c
${LIBRARY_DIR}/liblzma/delta/delta_encoder.c
${LIBRARY_DIR}/liblzma/lz/lz_decoder.c
${LIBRARY_DIR}/liblzma/lz/lz_encoder.c
${LIBRARY_DIR}/liblzma/lz/lz_encoder_mf.c
${LIBRARY_DIR}/liblzma/lzma/fastpos_table.c
${LIBRARY_DIR}/liblzma/lzma/fastpos_tablegen.c
${LIBRARY_DIR}/liblzma/lzma/lzma2_decoder.c
${LIBRARY_DIR}/liblzma/lzma/lzma2_encoder.c
${LIBRARY_DIR}/liblzma/lzma/lzma_decoder.c
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder.c
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder_optimum_fast.c
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder_optimum_normal.c
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder_presets.c
${LIBRARY_DIR}/liblzma/rangecoder/price_table.c
${LIBRARY_DIR}/liblzma/rangecoder/price_tablegen.c
${LIBRARY_DIR}/liblzma/simple/arm.c
${LIBRARY_DIR}/liblzma/simple/armthumb.c
${LIBRARY_DIR}/liblzma/simple/ia64.c
${LIBRARY_DIR}/liblzma/simple/powerpc.c
${LIBRARY_DIR}/liblzma/simple/simple_coder.c
${LIBRARY_DIR}/liblzma/simple/simple_decoder.c
${LIBRARY_DIR}/liblzma/simple/simple_encoder.c
${LIBRARY_DIR}/liblzma/simple/sparc.c
${LIBRARY_DIR}/liblzma/simple/x86.c
${LIBRARY_DIR}/lzmainfo/lzmainfo.c
)
# cd contrib/xz/src
# find . -name '*.h' | grep -vP 'deprecated|legacy|/xz/' | sort | sed 's/^\./ ${LIBRARY_DIR}/'
SET(Headers
${LIBRARY_DIR}/common/mythread.h
${LIBRARY_DIR}/common/sysdefs.h
${LIBRARY_DIR}/common/tuklib_common.h
${LIBRARY_DIR}/common/tuklib_config.h
${LIBRARY_DIR}/common/tuklib_cpucores.h
${LIBRARY_DIR}/common/tuklib_exit.h
${LIBRARY_DIR}/common/tuklib_gettext.h
${LIBRARY_DIR}/common/tuklib_integer.h
${LIBRARY_DIR}/common/tuklib_mbstr.h
${LIBRARY_DIR}/common/tuklib_open_stdxxx.h
${LIBRARY_DIR}/common/tuklib_physmem.h
${LIBRARY_DIR}/common/tuklib_progname.h
${LIBRARY_DIR}/liblzma/api/lzma/base.h
${LIBRARY_DIR}/liblzma/api/lzma/bcj.h
${LIBRARY_DIR}/liblzma/api/lzma/block.h
${LIBRARY_DIR}/liblzma/api/lzma/check.h
${LIBRARY_DIR}/liblzma/api/lzma/container.h
${LIBRARY_DIR}/liblzma/api/lzma/delta.h
${LIBRARY_DIR}/liblzma/api/lzma/filter.h
${LIBRARY_DIR}/liblzma/api/lzma.h
${LIBRARY_DIR}/liblzma/api/lzma/hardware.h
${LIBRARY_DIR}/liblzma/api/lzma/index.h
${LIBRARY_DIR}/liblzma/api/lzma/index_hash.h
${LIBRARY_DIR}/liblzma/api/lzma/lzma12.h
${LIBRARY_DIR}/liblzma/api/lzma/stream_flags.h
${LIBRARY_DIR}/liblzma/api/lzma/version.h
${LIBRARY_DIR}/liblzma/api/lzma/vli.h
${LIBRARY_DIR}/liblzma/check/check.h
${LIBRARY_DIR}/liblzma/check/crc32_table_be.h
${LIBRARY_DIR}/liblzma/check/crc32_table_le.h
${LIBRARY_DIR}/liblzma/check/crc64_table_be.h
${LIBRARY_DIR}/liblzma/check/crc64_table_le.h
${LIBRARY_DIR}/liblzma/check/crc_macros.h
${LIBRARY_DIR}/liblzma/common/alone_decoder.h
${LIBRARY_DIR}/liblzma/common/block_buffer_encoder.h
${LIBRARY_DIR}/liblzma/common/block_decoder.h
${LIBRARY_DIR}/liblzma/common/block_encoder.h
${LIBRARY_DIR}/liblzma/common/common.h
${LIBRARY_DIR}/liblzma/common/easy_preset.h
${LIBRARY_DIR}/liblzma/common/filter_common.h
${LIBRARY_DIR}/liblzma/common/filter_decoder.h
${LIBRARY_DIR}/liblzma/common/filter_encoder.h
${LIBRARY_DIR}/liblzma/common/index_decoder.h
${LIBRARY_DIR}/liblzma/common/index_encoder.h
${LIBRARY_DIR}/liblzma/common/index.h
${LIBRARY_DIR}/liblzma/common/memcmplen.h
${LIBRARY_DIR}/liblzma/common/outqueue.h
${LIBRARY_DIR}/liblzma/common/stream_decoder.h
${LIBRARY_DIR}/liblzma/common/stream_flags_common.h
${LIBRARY_DIR}/liblzma/delta/delta_common.h
${LIBRARY_DIR}/liblzma/delta/delta_decoder.h
${LIBRARY_DIR}/liblzma/delta/delta_encoder.h
${LIBRARY_DIR}/liblzma/delta/delta_private.h
${LIBRARY_DIR}/liblzma/lz/lz_decoder.h
${LIBRARY_DIR}/liblzma/lz/lz_encoder.h
${LIBRARY_DIR}/liblzma/lz/lz_encoder_hash.h
${LIBRARY_DIR}/liblzma/lz/lz_encoder_hash_table.h
${LIBRARY_DIR}/liblzma/lzma/fastpos.h
${LIBRARY_DIR}/liblzma/lzma/lzma2_decoder.h
${LIBRARY_DIR}/liblzma/lzma/lzma2_encoder.h
${LIBRARY_DIR}/liblzma/lzma/lzma_common.h
${LIBRARY_DIR}/liblzma/lzma/lzma_decoder.h
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder.h
${LIBRARY_DIR}/liblzma/lzma/lzma_encoder_private.h
${LIBRARY_DIR}/liblzma/rangecoder/price.h
${LIBRARY_DIR}/liblzma/rangecoder/range_common.h
${LIBRARY_DIR}/liblzma/rangecoder/range_decoder.h
${LIBRARY_DIR}/liblzma/rangecoder/range_encoder.h
${LIBRARY_DIR}/liblzma/simple/simple_coder.h
${LIBRARY_DIR}/liblzma/simple/simple_decoder.h
${LIBRARY_DIR}/liblzma/simple/simple_encoder.h
${LIBRARY_DIR}/liblzma/simple/simple_private.h
)
ADD_LIBRARY(liblzma ${Sources} ${Headers})
target_include_directories(liblzma PUBLIC
${LIBRARY_DIR}/liblzma/api
${LIBRARY_DIR}/liblzma/common
${LIBRARY_DIR}/liblzma/check
${LIBRARY_DIR}/liblzma/lz
${LIBRARY_DIR}/liblzma/rangecoder
${LIBRARY_DIR}/liblzma/lzma
${LIBRARY_DIR}/liblzma/delta
${LIBRARY_DIR}/liblzma/simple
${LIBRARY_DIR}/common
)

View File

@ -6,6 +6,8 @@
#include <IO/ZlibDeflatingWriteBuffer.h>
#include <IO/BrotliReadBuffer.h>
#include <IO/BrotliWriteBuffer.h>
#include <IO/LzmaReadBuffer.h>
#include <IO/LzmaWriteBuffer.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
@ -28,6 +30,7 @@ std::string toContentEncodingName(CompressionMethod method)
case CompressionMethod::Gzip: return "gzip";
case CompressionMethod::Zlib: return "deflate";
case CompressionMethod::Brotli: return "br";
case CompressionMethod::Xz: return "xz";
case CompressionMethod::None: return "";
}
__builtin_unreachable();
@ -73,6 +76,8 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
if (method == CompressionMethod::Brotli)
return std::make_unique<BrotliReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
#endif
if (method == CompressionMethod::Xz)
return std::make_unique<LzmaReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
if (method == CompressionMethod::None)
return nested;

View File

@ -1,14 +1,13 @@
#pragma once
#include <string>
#include <memory>
#include <string>
#include <Core/Defines.h>
namespace DB
{
class ReadBuffer;
class WriteBuffer;
@ -26,6 +25,9 @@ enum class CompressionMethod
/// DEFLATE compression with zlib header and Adler32 checksum.
/// This option corresponds to HTTP Content-Encoding: deflate.
Zlib,
/// LZMA2-based content compression
/// This option corresponds to HTTP Content-Encoding: xz
Xz,
Brotli
};

72
src/IO/LzmaReadBuffer.cpp Normal file
View File

@ -0,0 +1,72 @@
#include <IO/LzmaReadBuffer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LZMA_STREAM_DECODER_FAILED;
}
LzmaReadBuffer::LzmaReadBuffer(
std::unique_ptr<ReadBuffer> in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment), in(std::move(in_))
{
lstr.allocator = nullptr;
lstr.next_in = nullptr;
lstr.avail_in = 0;
lstr.next_out = nullptr;
lstr.avail_out = 0;
// 500 mb
uint64_t memlimit = 500 << 30;
lstr = LZMA_STREAM_INIT;
lzma_ret ret = lzma_stream_decoder(&lstr, memlimit, LZMA_CONCATENATED | LZMA_IGNORE_CHECK);
// lzma does not provide api for converting error code to string unlike zlib
if (ret != LZMA_OK)
throw Exception(
std::string("lzma_stream_decoder failed: error code: ") + std::to_string(ret) + "; lzma version: " + LZMA_VERSION_STRING,
ErrorCodes::LZMA_STREAM_DECODER_FAILED);
}
LzmaReadBuffer::~LzmaReadBuffer()
{
lzma_end(&lstr);
}
bool LzmaReadBuffer::nextImpl()
{
if (eof)
{
return false;
}
if (!lstr.avail_in)
{
in->nextIfAtEnd();
lstr.next_in = reinterpret_cast<unsigned char *>(in->position());
lstr.avail_in = in->buffer().end() - in->position();
}
lstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
lstr.avail_out = internal_buffer.size();
lzma_ret ret = lzma_code(&lstr, LZMA_FINISH);
in->position() = in->buffer().end() - lstr.avail_in;
if (ret == LZMA_STREAM_END)
{
if (in->eof())
{
eof = true;
return working_buffer.size() != 0;
}
}
if (ret != LZMA_OK)
throw Exception(
std::string("lzma_stream_decoder failed: error code: ") + std::to_string(ret) + "; lzma version: " + LZMA_VERSION_STRING,
ErrorCodes::LZMA_STREAM_DECODER_FAILED);
return true
}
}

34
src/IO/LzmaReadBuffer.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/CompressionMethod.h>
#include <IO/ReadBuffer.h>
#include <lzma.h>
namespace DB
{
namespace ErrorCodes
{
}
class LzmaReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
public:
LzmaReadBuffer(
std::unique_ptr<ReadBuffer> in_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
~LzmaReadBuffer() override;
private:
bool nextImpl() override;
std::unique_ptr<ReadBuffer> in;
lzma_stream lstr;
bool eof;
};
}

View File

@ -0,0 +1 @@
#include <IO/LzmaWriteBuffer.h>

10
src/IO/LzmaWriteBuffer.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <lzma.h>
namespace DB {
}