2022-09-28 13:29:29 +00:00
|
|
|
#include "config.h"
|
2021-05-23 22:18:12 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
#include <Compression/CompressionFactory.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Poco/String.h>
|
|
|
|
#include <IO/ReadBuffer.h>
|
|
|
|
#include <Parsers/queryToString.h>
|
2024-03-18 15:02:54 +00:00
|
|
|
#include <Parsers/parseQuery.h>
|
|
|
|
#include <Parsers/ExpressionElementParsers.h>
|
2018-10-11 02:57:48 +00:00
|
|
|
#include <Compression/CompressionCodecMultiple.h>
|
2020-09-22 12:49:55 +00:00
|
|
|
#include <Compression/CompressionCodecNone.h>
|
2018-10-11 02:57:48 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
|
2020-08-28 17:40:45 +00:00
|
|
|
#include <boost/algorithm/string/join.hpp>
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2021-05-23 01:14:29 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2018-10-11 02:57:48 +00:00
|
|
|
extern const int UNKNOWN_CODEC;
|
|
|
|
extern const int UNEXPECTED_AST_STRUCTURE;
|
|
|
|
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
|
|
|
|
{
|
|
|
|
return default_codec;
|
|
|
|
}
|
|
|
|
|
2018-12-20 17:37:02 +00:00
|
|
|
|
2021-05-24 03:43:25 +00:00
|
|
|
CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std::optional<int> level) const
|
2021-05-06 11:57:22 +00:00
|
|
|
{
|
2021-05-24 03:43:25 +00:00
|
|
|
if (level)
|
2021-05-23 01:25:06 +00:00
|
|
|
{
|
|
|
|
auto level_literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
|
|
|
return get(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), level_literal)), {});
|
|
|
|
}
|
2021-05-23 01:14:29 +00:00
|
|
|
else
|
|
|
|
{
|
2021-05-23 01:25:06 +00:00
|
|
|
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
|
|
|
return get(makeASTFunction("CODEC", identifier), {});
|
2021-05-06 11:57:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-18 15:02:54 +00:00
|
|
|
CompressionCodecPtr CompressionCodecFactory::get(const String & compression_codec) const
|
|
|
|
{
|
|
|
|
ParserCodec codec_parser;
|
|
|
|
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
|
|
|
|
return CompressionCodecFactory::instance().get(ast, nullptr);
|
|
|
|
}
|
2021-05-23 00:55:16 +00:00
|
|
|
|
|
|
|
CompressionCodecPtr CompressionCodecFactory::get(
|
|
|
|
const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
|
2020-08-28 17:40:45 +00:00
|
|
|
{
|
|
|
|
if (current_default == nullptr)
|
|
|
|
current_default = default_codec;
|
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * func = ast->as<ASTFunction>())
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
Codecs codecs;
|
|
|
|
codecs.reserve(func->arguments->children.size());
|
|
|
|
for (const auto & inner_codec_ast : func->arguments->children)
|
|
|
|
{
|
2020-08-28 17:40:45 +00:00
|
|
|
String codec_family_name;
|
|
|
|
ASTPtr codec_arguments;
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
|
2020-08-28 17:40:45 +00:00
|
|
|
{
|
2020-10-24 18:46:10 +00:00
|
|
|
codec_family_name = family_name->name();
|
2020-08-28 17:40:45 +00:00
|
|
|
codec_arguments = {};
|
|
|
|
}
|
2019-03-11 13:22:51 +00:00
|
|
|
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
|
2020-08-28 17:40:45 +00:00
|
|
|
{
|
|
|
|
codec_family_name = ast_func->name;
|
|
|
|
codec_arguments = ast_func->arguments;
|
|
|
|
}
|
2018-10-11 02:57:48 +00:00
|
|
|
else
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for compression codec");
|
2020-08-28 17:40:45 +00:00
|
|
|
|
2020-09-21 14:22:13 +00:00
|
|
|
CompressionCodecPtr codec;
|
2020-08-28 17:40:45 +00:00
|
|
|
if (codec_family_name == DEFAULT_CODEC_NAME)
|
2020-09-21 14:22:13 +00:00
|
|
|
codec = current_default;
|
2020-08-28 17:40:45 +00:00
|
|
|
else
|
2020-09-21 14:22:13 +00:00
|
|
|
codec = getImpl(codec_family_name, codec_arguments, column_type);
|
|
|
|
|
|
|
|
if (only_generic && !codec->isGenericCompression())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
codecs.emplace_back(codec);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2020-05-04 00:11:49 +00:00
|
|
|
CompressionCodecPtr res;
|
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
if (codecs.size() == 1)
|
2020-08-28 17:40:45 +00:00
|
|
|
return codecs.back();
|
2018-10-11 02:57:48 +00:00
|
|
|
else if (codecs.size() > 1)
|
2020-08-28 17:40:45 +00:00
|
|
|
return std::make_shared<CompressionCodecMultiple>(codecs);
|
2020-09-21 14:22:13 +00:00
|
|
|
else
|
2020-09-22 12:49:55 +00:00
|
|
|
return std::make_shared<CompressionCodecNone>();
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST structure for compression codec: {}", queryToString(ast));
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2021-05-23 00:55:16 +00:00
|
|
|
|
2022-03-12 17:38:11 +00:00
|
|
|
CompressionCodecPtr CompressionCodecFactory::get(uint8_t byte_code) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
const auto family_code_and_creator = family_code_with_codec.find(byte_code);
|
|
|
|
|
|
|
|
if (family_code_and_creator == family_code_with_codec.end())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_CODEC, "Unknown codec family code: {}", toString(byte_code));
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
return family_code_and_creator->second({}, nullptr);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
|
2020-09-21 11:24:10 +00:00
|
|
|
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-21 12:17:30 +00:00
|
|
|
if (family_name == "Multiple")
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_CODEC, "Codec Multiple cannot be specified directly");
|
2018-12-17 08:31:59 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
const auto family_and_creator = family_name_with_codec.find(family_name);
|
|
|
|
|
|
|
|
if (family_and_creator == family_name_with_codec.end())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_CODEC, "Unknown codec family: {}", family_name);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
return family_and_creator->second(arguments, column_type);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
void CompressionCodecFactory::registerCompressionCodecWithType(
|
|
|
|
const String & family_name,
|
2020-01-03 14:39:24 +00:00
|
|
|
std::optional<uint8_t> byte_code,
|
2019-01-15 14:20:34 +00:00
|
|
|
CreatorWithType creator)
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
if (creator == nullptr)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompressionCodecFactory: "
|
|
|
|
"the codec family {} has been provided a null constructor", family_name);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
if (!family_name_with_codec.emplace(family_name, creator).second)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompressionCodecFactory: the codec family name '{}' is not unique", family_name);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2018-12-21 14:03:53 +00:00
|
|
|
if (byte_code)
|
|
|
|
if (!family_code_with_codec.emplace(*byte_code, creator).second)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"CompressionCodecFactory: the codec family code '{}' is not unique",
|
|
|
|
std::to_string(*byte_code));
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2020-01-03 14:39:24 +00:00
|
|
|
void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator)
|
2019-01-15 14:20:34 +00:00
|
|
|
{
|
2020-09-21 11:24:10 +00:00
|
|
|
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */)
|
2019-01-15 14:20:34 +00:00
|
|
|
{
|
|
|
|
return creator(ast);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
void CompressionCodecFactory::registerSimpleCompressionCodec(
|
|
|
|
const String & family_name,
|
2020-01-03 14:39:24 +00:00
|
|
|
std::optional<uint8_t> byte_code,
|
2019-01-15 14:20:34 +00:00
|
|
|
SimpleCreator creator)
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast)
|
|
|
|
{
|
|
|
|
if (ast)
|
2021-05-23 00:55:16 +00:00
|
|
|
throw Exception(ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS, "Compression codec {} cannot have arguments", family_name);
|
2018-10-11 02:57:48 +00:00
|
|
|
return creator();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-12-20 17:37:02 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
void registerCodecNone(CompressionCodecFactory & factory);
|
2020-07-09 00:46:00 +00:00
|
|
|
void registerCodecLZ4(CompressionCodecFactory & factory);
|
|
|
|
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
2018-10-11 02:57:48 +00:00
|
|
|
void registerCodecZSTD(CompressionCodecFactory & factory);
|
2024-01-07 17:47:31 +00:00
|
|
|
#ifdef ENABLE_ZSTD_QAT_CODEC
|
2023-12-13 16:02:06 +00:00
|
|
|
void registerCodecZSTDQAT(CompressionCodecFactory & factory);
|
|
|
|
#endif
|
2022-03-16 10:37:18 +00:00
|
|
|
void registerCodecMultiple(CompressionCodecFactory & factory);
|
2023-09-05 12:35:39 +00:00
|
|
|
#ifdef ENABLE_QPL_COMPRESSION
|
2022-07-09 18:42:01 +00:00
|
|
|
void registerCodecDeflateQpl(CompressionCodecFactory & factory);
|
2023-09-05 12:35:39 +00:00
|
|
|
#endif
|
2022-03-16 10:37:18 +00:00
|
|
|
|
2022-03-28 09:25:08 +00:00
|
|
|
/// Keeper use only general-purpose codecs, so we don't need these special codecs
|
2022-03-16 10:37:18 +00:00
|
|
|
/// in standalone build
|
2023-09-01 10:55:58 +00:00
|
|
|
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
|
2019-01-14 11:54:40 +00:00
|
|
|
void registerCodecDelta(CompressionCodecFactory & factory);
|
2019-06-06 17:11:31 +00:00
|
|
|
void registerCodecT64(CompressionCodecFactory & factory);
|
2019-06-03 09:56:50 +00:00
|
|
|
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
2019-06-12 17:12:08 +00:00
|
|
|
void registerCodecGorilla(CompressionCodecFactory & factory);
|
2021-07-30 09:12:33 +00:00
|
|
|
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
2022-05-25 19:04:39 +00:00
|
|
|
void registerCodecFPC(CompressionCodecFactory & factory);
|
2023-08-08 17:26:01 +00:00
|
|
|
void registerCodecGCD(CompressionCodecFactory & factory);
|
2022-03-16 10:37:18 +00:00
|
|
|
#endif
|
2021-05-23 22:18:12 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
CompressionCodecFactory::CompressionCodecFactory()
|
|
|
|
{
|
|
|
|
registerCodecNone(*this);
|
2022-03-16 10:37:18 +00:00
|
|
|
registerCodecLZ4(*this);
|
2018-10-11 02:57:48 +00:00
|
|
|
registerCodecZSTD(*this);
|
2024-01-07 17:47:31 +00:00
|
|
|
#ifdef ENABLE_ZSTD_QAT_CODEC
|
2023-12-13 16:02:06 +00:00
|
|
|
registerCodecZSTDQAT(*this);
|
|
|
|
#endif
|
2018-12-21 14:03:53 +00:00
|
|
|
registerCodecLZ4HC(*this);
|
2022-03-16 10:37:18 +00:00
|
|
|
registerCodecMultiple(*this);
|
2023-09-01 10:55:58 +00:00
|
|
|
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
|
2019-01-14 11:54:40 +00:00
|
|
|
registerCodecDelta(*this);
|
2019-06-06 17:11:31 +00:00
|
|
|
registerCodecT64(*this);
|
2019-06-03 09:56:50 +00:00
|
|
|
registerCodecDoubleDelta(*this);
|
2019-06-12 17:12:08 +00:00
|
|
|
registerCodecGorilla(*this);
|
2021-07-30 09:12:33 +00:00
|
|
|
registerCodecEncrypted(*this);
|
2022-05-25 19:04:39 +00:00
|
|
|
registerCodecFPC(*this);
|
2023-01-20 21:16:55 +00:00
|
|
|
#ifdef ENABLE_QPL_COMPRESSION
|
|
|
|
registerCodecDeflateQpl(*this);
|
|
|
|
#endif
|
2023-09-05 12:37:31 +00:00
|
|
|
registerCodecGCD(*this);
|
2022-03-16 10:37:18 +00:00
|
|
|
#endif
|
2020-07-09 00:46:00 +00:00
|
|
|
|
2021-05-24 03:43:25 +00:00
|
|
|
default_codec = get("LZ4", {});
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 03:24:05 +00:00
|
|
|
CompressionCodecFactory & CompressionCodecFactory::instance()
|
|
|
|
{
|
|
|
|
static CompressionCodecFactory ret;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-12-13 15:26:28 +00:00
|
|
|
}
|