2020-01-04 07:31:00 +00:00
|
|
|
#include <IO/CompressionMethod.h>
|
|
|
|
|
2020-11-09 22:52:22 +00:00
|
|
|
#include <IO/BrotliReadBuffer.h>
|
|
|
|
#include <IO/BrotliWriteBuffer.h>
|
|
|
|
#include <IO/LZMADeflatingWriteBuffer.h>
|
|
|
|
#include <IO/LZMAInflatingReadBuffer.h>
|
2020-01-04 07:31:00 +00:00
|
|
|
#include <IO/ReadBuffer.h>
|
|
|
|
#include <IO/WriteBuffer.h>
|
|
|
|
#include <IO/ZlibDeflatingWriteBuffer.h>
|
2020-11-09 22:52:22 +00:00
|
|
|
#include <IO/ZlibInflatingReadBuffer.h>
|
2020-11-17 16:02:10 +00:00
|
|
|
#include <IO/ZstdDeflatingWriteBuffer.h>
|
|
|
|
#include <IO/ZstdInflatingReadBuffer.h>
|
2021-06-16 05:43:07 +00:00
|
|
|
#include <IO/Lz4DeflatingWriteBuffer.h>
|
|
|
|
#include <IO/Lz4InflatingReadBuffer.h>
|
2021-08-06 23:55:04 +00:00
|
|
|
#include <IO/Bzip2ReadBuffer.h>
|
|
|
|
#include <IO/Bzip2WriteBuffer.h>
|
2021-11-05 11:55:30 +00:00
|
|
|
#include <IO/HadoopSnappyReadBuffer.h>
|
2020-01-04 07:31:00 +00:00
|
|
|
|
2022-09-28 08:45:15 +00:00
|
|
|
#include "config.h"
|
2020-01-04 07:31:00 +00:00
|
|
|
|
2021-03-03 11:51:09 +00:00
|
|
|
#include <boost/algorithm/string/case_conv.hpp>
|
|
|
|
|
2020-01-04 07:31:00 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string toContentEncodingName(CompressionMethod method)
|
|
|
|
{
|
|
|
|
switch (method)
|
|
|
|
{
|
2020-11-09 22:52:22 +00:00
|
|
|
case CompressionMethod::Gzip:
|
|
|
|
return "gzip";
|
|
|
|
case CompressionMethod::Zlib:
|
|
|
|
return "deflate";
|
|
|
|
case CompressionMethod::Brotli:
|
|
|
|
return "br";
|
|
|
|
case CompressionMethod::Xz:
|
|
|
|
return "xz";
|
2020-11-17 16:02:10 +00:00
|
|
|
case CompressionMethod::Zstd:
|
|
|
|
return "zstd";
|
2021-06-16 05:43:07 +00:00
|
|
|
case CompressionMethod::Lz4:
|
|
|
|
return "lz4";
|
2021-08-06 23:55:04 +00:00
|
|
|
case CompressionMethod::Bzip2:
|
|
|
|
return "bz2";
|
2021-11-05 11:55:30 +00:00
|
|
|
case CompressionMethod::Snappy:
|
|
|
|
return "snappy";
|
2020-11-09 22:52:22 +00:00
|
|
|
case CompressionMethod::None:
|
|
|
|
return "";
|
2020-01-04 07:31:00 +00:00
|
|
|
}
|
2022-10-07 19:20:14 +00:00
|
|
|
UNREACHABLE();
|
2020-01-04 07:31:00 +00:00
|
|
|
}
|
|
|
|
|
2022-10-04 03:13:46 +00:00
|
|
|
CompressionMethod chooseHTTPCompressionMethod(const std::string & list)
|
|
|
|
{
|
2022-10-05 00:48:01 +00:00
|
|
|
/// The compression methods are ordered from most to least preferred.
|
2022-10-04 03:13:46 +00:00
|
|
|
|
2022-10-05 00:48:01 +00:00
|
|
|
if (std::string::npos != list.find("zstd"))
|
|
|
|
return CompressionMethod::Zstd;
|
|
|
|
else if (std::string::npos != list.find("br"))
|
2022-10-04 03:13:46 +00:00
|
|
|
return CompressionMethod::Brotli;
|
2022-10-05 00:48:01 +00:00
|
|
|
else if (std::string::npos != list.find("lz4"))
|
|
|
|
return CompressionMethod::Lz4;
|
|
|
|
else if (std::string::npos != list.find("snappy"))
|
|
|
|
return CompressionMethod::Snappy;
|
2022-10-04 03:13:46 +00:00
|
|
|
else if (std::string::npos != list.find("gzip"))
|
|
|
|
return CompressionMethod::Gzip;
|
|
|
|
else if (std::string::npos != list.find("deflate"))
|
|
|
|
return CompressionMethod::Zlib;
|
|
|
|
else if (std::string::npos != list.find("xz"))
|
|
|
|
return CompressionMethod::Xz;
|
|
|
|
else if (std::string::npos != list.find("bz2"))
|
|
|
|
return CompressionMethod::Bzip2;
|
|
|
|
else
|
|
|
|
return CompressionMethod::None;
|
|
|
|
}
|
|
|
|
|
2020-01-04 07:31:00 +00:00
|
|
|
CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint)
|
|
|
|
{
|
|
|
|
std::string file_extension;
|
|
|
|
if (hint.empty() || hint == "auto")
|
|
|
|
{
|
|
|
|
auto pos = path.find_last_of('.');
|
|
|
|
if (pos != std::string::npos)
|
|
|
|
file_extension = path.substr(pos + 1, std::string::npos);
|
|
|
|
}
|
|
|
|
|
2022-02-25 19:04:48 +00:00
|
|
|
std::string method_str;
|
|
|
|
|
|
|
|
if (file_extension.empty())
|
|
|
|
method_str = hint;
|
|
|
|
else
|
|
|
|
method_str = std::move(file_extension);
|
|
|
|
|
2021-03-03 11:51:09 +00:00
|
|
|
boost::algorithm::to_lower(method_str);
|
2020-01-04 07:31:00 +00:00
|
|
|
|
2021-03-03 11:51:09 +00:00
|
|
|
if (method_str == "gzip" || method_str == "gz")
|
2020-01-04 07:31:00 +00:00
|
|
|
return CompressionMethod::Gzip;
|
2021-03-03 11:51:09 +00:00
|
|
|
if (method_str == "deflate")
|
2020-01-04 07:31:00 +00:00
|
|
|
return CompressionMethod::Zlib;
|
2021-03-03 11:51:09 +00:00
|
|
|
if (method_str == "brotli" || method_str == "br")
|
2020-01-04 07:31:00 +00:00
|
|
|
return CompressionMethod::Brotli;
|
2021-03-03 11:51:09 +00:00
|
|
|
if (method_str == "lzma" || method_str == "xz")
|
2020-11-09 22:52:22 +00:00
|
|
|
return CompressionMethod::Xz;
|
2021-03-03 11:51:09 +00:00
|
|
|
if (method_str == "zstd" || method_str == "zst")
|
2020-11-17 16:02:10 +00:00
|
|
|
return CompressionMethod::Zstd;
|
2021-06-16 05:43:07 +00:00
|
|
|
if (method_str == "lz4")
|
|
|
|
return CompressionMethod::Lz4;
|
2021-08-06 23:55:04 +00:00
|
|
|
if (method_str == "bz2")
|
|
|
|
return CompressionMethod::Bzip2;
|
2021-11-05 11:55:30 +00:00
|
|
|
if (method_str == "snappy")
|
|
|
|
return CompressionMethod::Snappy;
|
2020-01-04 18:50:19 +00:00
|
|
|
if (hint.empty() || hint == "auto" || hint == "none")
|
2020-01-04 07:31:00 +00:00
|
|
|
return CompressionMethod::None;
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown compression method '{}'. "
|
|
|
|
"Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2', 'snappy' are supported as compression methods", hint);
|
2020-01-04 07:31:00 +00:00
|
|
|
}
|
|
|
|
|
2022-07-07 01:47:33 +00:00
|
|
|
std::pair<uint64_t, uint64_t> getCompressionLevelRange(const CompressionMethod & method)
|
|
|
|
{
|
|
|
|
switch (method)
|
|
|
|
{
|
|
|
|
case CompressionMethod::Zstd:
|
|
|
|
return {1, 22};
|
|
|
|
case CompressionMethod::Lz4:
|
|
|
|
return {1, 12};
|
|
|
|
default:
|
|
|
|
return {1, 9};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-15 23:56:45 +00:00
|
|
|
static std::unique_ptr<CompressedReadBufferWrapper> createCompressedWrapper(
|
2022-06-18 12:55:35 +00:00
|
|
|
std::unique_ptr<ReadBuffer> nested, CompressionMethod method, size_t buf_size, char * existing_memory, size_t alignment, int zstd_window_log_max)
|
2020-01-04 07:31:00 +00:00
|
|
|
{
|
|
|
|
if (method == CompressionMethod::Gzip || method == CompressionMethod::Zlib)
|
2020-01-04 22:59:08 +00:00
|
|
|
return std::make_unique<ZlibInflatingReadBuffer>(std::move(nested), method, buf_size, existing_memory, alignment);
|
2020-01-04 07:31:00 +00:00
|
|
|
#if USE_BROTLI
|
|
|
|
if (method == CompressionMethod::Brotli)
|
2020-01-04 22:59:08 +00:00
|
|
|
return std::make_unique<BrotliReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
2020-01-04 07:31:00 +00:00
|
|
|
#endif
|
2020-10-31 23:56:41 +00:00
|
|
|
if (method == CompressionMethod::Xz)
|
2020-11-09 22:52:22 +00:00
|
|
|
return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
2020-11-17 16:02:10 +00:00
|
|
|
if (method == CompressionMethod::Zstd)
|
2022-06-18 12:55:35 +00:00
|
|
|
return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment, zstd_window_log_max);
|
2021-06-16 05:43:07 +00:00
|
|
|
if (method == CompressionMethod::Lz4)
|
|
|
|
return std::make_unique<Lz4InflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
2021-08-06 23:55:04 +00:00
|
|
|
#if USE_BZIP2
|
|
|
|
if (method == CompressionMethod::Bzip2)
|
|
|
|
return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
|
|
|
#endif
|
2021-11-05 11:55:30 +00:00
|
|
|
#if USE_SNAPPY
|
|
|
|
if (method == CompressionMethod::Snappy)
|
|
|
|
return std::make_unique<HadoopSnappyReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
|
|
|
#endif
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
|
2022-04-15 23:56:45 +00:00
|
|
|
}
|
|
|
|
|
2022-05-08 06:02:40 +00:00
|
|
|
std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
|
2022-06-18 12:55:35 +00:00
|
|
|
std::unique_ptr<ReadBuffer> nested, CompressionMethod method, int zstd_window_log_max, size_t buf_size, char * existing_memory, size_t alignment)
|
2022-05-08 06:02:40 +00:00
|
|
|
{
|
|
|
|
if (method == CompressionMethod::None)
|
|
|
|
return nested;
|
2022-06-18 12:55:35 +00:00
|
|
|
return createCompressedWrapper(std::move(nested), method, buf_size, existing_memory, alignment, zstd_window_log_max);
|
2022-05-08 06:02:40 +00:00
|
|
|
}
|
|
|
|
|
2020-01-04 22:59:08 +00:00
|
|
|
std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
2020-11-09 22:52:22 +00:00
|
|
|
std::unique_ptr<WriteBuffer> nested, CompressionMethod method, int level, size_t buf_size, char * existing_memory, size_t alignment)
|
2020-01-04 07:31:00 +00:00
|
|
|
{
|
|
|
|
if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib)
|
2020-01-04 22:59:08 +00:00
|
|
|
return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(nested), method, level, buf_size, existing_memory, alignment);
|
2020-01-04 07:31:00 +00:00
|
|
|
|
|
|
|
#if USE_BROTLI
|
|
|
|
if (method == DB::CompressionMethod::Brotli)
|
2020-01-04 22:59:08 +00:00
|
|
|
return std::make_unique<BrotliWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
2020-01-04 07:31:00 +00:00
|
|
|
#endif
|
2020-11-09 22:52:22 +00:00
|
|
|
if (method == CompressionMethod::Xz)
|
|
|
|
return std::make_unique<LZMADeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
2020-01-04 07:31:00 +00:00
|
|
|
|
2020-11-17 16:02:10 +00:00
|
|
|
if (method == CompressionMethod::Zstd)
|
|
|
|
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
|
|
|
|
2021-06-16 05:43:07 +00:00
|
|
|
if (method == CompressionMethod::Lz4)
|
|
|
|
return std::make_unique<Lz4DeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
|
|
|
|
2021-08-06 23:55:04 +00:00
|
|
|
#if USE_BZIP2
|
|
|
|
if (method == CompressionMethod::Bzip2)
|
|
|
|
return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
2021-11-05 11:55:30 +00:00
|
|
|
#endif
|
|
|
|
#if USE_SNAPPY
|
|
|
|
if (method == CompressionMethod::Snappy)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
|
2021-08-06 23:55:04 +00:00
|
|
|
#endif
|
2020-01-04 07:31:00 +00:00
|
|
|
if (method == CompressionMethod::None)
|
|
|
|
return nested;
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
|
2020-01-04 07:31:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|