Merge pull request #4874 from yandex/fix_compressor_codecs

Fix compressor codecs
This commit is contained in:
alexey-milovidov 2019-04-01 22:27:25 +03:00 committed by GitHub
commit 08943fcfdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 18 deletions

View File

@ -1,6 +1,7 @@
#include <iostream>
#include <optional>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/join.hpp>
#include <Common/Exception.h>
#include <IO/WriteBufferFromFileDescriptor.h>
@ -9,6 +10,8 @@
#include <Compression/CompressedReadBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Compression/CompressionFactory.h>
@ -64,7 +67,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", boost::program_options::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", boost::program_options::value<std::vector<int>>()->multitoken(), "compression levels for codecs specified via --codec")
("level", boost::program_options::value<int>(), "compression level for codecs spicified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;
@ -94,6 +97,9 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
if ((use_lz4hc || use_zstd || use_none) && !codecs.empty())
throw DB::Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", DB::ErrorCodes::BAD_ARGUMENTS);
if (!codecs.empty() && options.count("level"))
throw DB::Exception("Wrong options, --level is not compatible with --codec list", DB::ErrorCodes::BAD_ARGUMENTS);
std::string method_family = "LZ4";
if (use_lz4hc)
@ -103,28 +109,22 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
else if (use_none)
method_family = "NONE";
std::vector<int> levels;
std::optional<int> level = std::nullopt;
if (options.count("level"))
levels = options["level"].as<std::vector<int>>();
level = options["level"].as<int>();
DB::CompressionCodecPtr codec;
if (!codecs.empty())
{
if (levels.size() > codecs.size())
throw DB::Exception("Specified more levels than codecs", DB::ErrorCodes::BAD_ARGUMENTS);
DB::ParserCodec codec_parser;
std::vector<DB::CodecNameWithLevel> codec_names;
for (size_t i = 0; i < codecs.size(); ++i)
{
if (i < levels.size())
codec_names.emplace_back(codecs[i], levels[i]);
else
codec_names.emplace_back(codecs[i], std::nullopt);
}
codec = DB::CompressionCodecFactory::instance().get(codec_names);
std::string codecs_line = boost::algorithm::join(codecs, ",");
auto ast = DB::parseQuery(codec_parser, "(" + codecs_line + ")", 0);
codec = DB::CompressionCodecFactory::instance().get(ast, nullptr);
}
else
codec = DB::CompressionCodecFactory::instance().get(method_family, levels.empty() ? std::nullopt : std::optional<int>(levels.back()));
codec = DB::CompressionCodecFactory::instance().get(method_family, level);
DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO);

View File

@ -17,11 +17,11 @@ $ ./clickhouse-compressor --decompress < input_file > output_file
Compress data with ZSTD at level 5:
```
$ ./clickhouse-compressor --codec ZSTD --level 5 < input_file > output_file
$ ./clickhouse-compressor --codec 'ZSTD(5)' < input_file > output_file
```
Compress data with ZSTD level 10, LZ4HC level 7 and LZ4.
Compress data with Delta of four bytes and ZSTD level 10.
```
$ ./clickhouse-compressor --codec ZSTD --level 5 --codec LZ4HC --level 7 --codec LZ4 < input_file > output_file
$ ./clickhouse-compressor --codec 'Delta(4)' --codec 'ZSTD(10)' < input_file > output_file
```