mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Fix multiple codecs and add test
This commit is contained in:
parent
c5d6acf5e3
commit
10ee24d9a0
@ -1,6 +1,3 @@
|
||||
/// For magic_enum to properly get enum name of DB::CompressionMethodByte
|
||||
#define MAGIC_ENUM_RANGE_MAX 256
|
||||
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <boost/program_options.hpp>
|
||||
@ -14,9 +11,12 @@
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedReadBufferFromFile.h>
|
||||
#include <Compression/getCompressionCodecForFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
@ -41,31 +41,19 @@ void checkAndWriteHeader(DB::ReadBuffer & in, DB::WriteBuffer & out)
|
||||
{
|
||||
while (!in.eof())
|
||||
{
|
||||
in.ignore(16); /// checksum
|
||||
|
||||
char header[COMPRESSED_BLOCK_HEADER_SIZE];
|
||||
in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE);
|
||||
|
||||
UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]);
|
||||
UInt32 size_compressed;
|
||||
UInt32 size_decompressed;
|
||||
auto codec = DB::getCompressionCodecForFile(in, size_compressed, size_decompressed, true /* skip_to_next_block */);
|
||||
|
||||
if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
|
||||
throw DB::Exception(DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED, "Too large size_compressed. Most likely corrupted data.");
|
||||
|
||||
UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]);
|
||||
|
||||
auto method_byte = static_cast<uint8_t>(header[0]);
|
||||
auto method = magic_enum::enum_cast<DB::CompressionMethodByte>(method_byte);
|
||||
if (method)
|
||||
DB::writeText(magic_enum::enum_name(*method), out);
|
||||
else
|
||||
DB::writeText(fmt::format("UNKNOWN({})", method_byte), out);
|
||||
DB::writeText(queryToString(codec->getFullCodecDesc()), out);
|
||||
DB::writeChar('\t', out);
|
||||
DB::writeText(size_decompressed, out);
|
||||
DB::writeChar('\t', out);
|
||||
DB::writeText(size_compressed, out);
|
||||
DB::writeChar('\n', out);
|
||||
|
||||
in.ignore(size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,33 +10,50 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
using Checksum = CityHash_v1_0_2::uint128;
|
||||
|
||||
CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path)
|
||||
CompressionCodecPtr
|
||||
getCompressionCodecForFile(ReadBuffer & read_buffer, UInt32 & size_compressed, UInt32 & size_decompressed, bool skip_to_next_block)
|
||||
{
|
||||
auto read_buffer = data_part_storage.readFile(relative_path, {}, std::nullopt, std::nullopt);
|
||||
read_buffer->ignore(sizeof(Checksum));
|
||||
read_buffer.ignore(sizeof(Checksum));
|
||||
|
||||
UInt8 header_size = ICompressionCodec::getHeaderSize();
|
||||
size_t starting_bytes = read_buffer.count();
|
||||
PODArray<char> compressed_buffer;
|
||||
compressed_buffer.resize(header_size);
|
||||
read_buffer->readStrict(compressed_buffer.data(), header_size);
|
||||
read_buffer.readStrict(compressed_buffer.data(), header_size);
|
||||
uint8_t method = ICompressionCodec::readMethod(compressed_buffer.data());
|
||||
size_compressed = unalignedLoad<UInt32>(&compressed_buffer[1]);
|
||||
size_decompressed = unalignedLoad<UInt32>(&compressed_buffer[5]);
|
||||
if (method == static_cast<uint8_t>(CompressionMethodByte::Multiple))
|
||||
{
|
||||
compressed_buffer.resize(1);
|
||||
read_buffer->readStrict(compressed_buffer.data(), 1);
|
||||
read_buffer.readStrict(compressed_buffer.data(), 1);
|
||||
compressed_buffer.resize(1 + compressed_buffer[0]);
|
||||
read_buffer->readStrict(compressed_buffer.data() + 1, compressed_buffer[0]);
|
||||
read_buffer.readStrict(compressed_buffer.data() + 1, compressed_buffer[0]);
|
||||
auto codecs_bytes = CompressionCodecMultiple::getCodecsBytesFromData(compressed_buffer.data());
|
||||
Codecs codecs;
|
||||
for (auto byte : codecs_bytes)
|
||||
codecs.push_back(CompressionCodecFactory::instance().get(byte));
|
||||
|
||||
if (skip_to_next_block)
|
||||
read_buffer.ignore(size_compressed - (read_buffer.count() - starting_bytes));
|
||||
|
||||
return std::make_shared<CompressionCodecMultiple>(codecs);
|
||||
}
|
||||
|
||||
if (skip_to_next_block)
|
||||
read_buffer.ignore(size_compressed - (read_buffer.count() - starting_bytes));
|
||||
|
||||
return CompressionCodecFactory::instance().get(method);
|
||||
}
|
||||
|
||||
CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path)
|
||||
{
|
||||
auto read_buffer = data_part_storage.readFile(relative_path, {}, std::nullopt, std::nullopt);
|
||||
UInt32 size_compressed;
|
||||
UInt32 size_decompressed;
|
||||
return getCompressionCodecForFile(*read_buffer, size_compressed, size_decompressed, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,4 +13,8 @@ namespace DB
|
||||
/// from metadata.
|
||||
CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path);
|
||||
|
||||
/// Same as above which is used by clickhouse-compressor to print compression statistics of each data block.
|
||||
CompressionCodecPtr
|
||||
getCompressionCodecForFile(ReadBuffer & read_buffer, UInt32 & size_compressed, UInt32 & size_decompressed, bool skip_to_next_block);
|
||||
|
||||
}
|
||||
|
@ -0,0 +1 @@
|
||||
CODEC(Delta(1), LZ4) 14 48
|
13
tests/queries/0_stateless/03260_compressor_stat.sh
Executable file
13
tests/queries/0_stateless/03260_compressor_stat.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
echo "Hello, World!" > 03260_test_data
|
||||
|
||||
$CLICKHOUSE_COMPRESSOR --codec 'Delta' --codec 'LZ4' --input '03260_test_data' --output '03260_test_out'
|
||||
|
||||
$CLICKHOUSE_COMPRESSOR --stat '03260_test_out'
|
||||
|
||||
rm -f 03260_test_data 03260_test_out
|
Loading…
Reference in New Issue
Block a user