2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/CompressedReadBufferBase.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
2017-07-28 14:14:07 +00:00
|
|
|
#include <string.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
#include <city.h>
|
2017-02-28 23:49:04 +00:00
|
|
|
#include <zstd.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/PODArray.h>
|
|
|
|
#include <Common/ProfileEvents.h>
|
|
|
|
#include <Common/Exception.h>
|
2018-07-12 18:47:58 +00:00
|
|
|
#include <Common/hex.h>
|
2017-06-23 20:22:35 +00:00
|
|
|
#include <common/unaligned.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadBuffer.h>
|
|
|
|
#include <IO/BufferWithOwnMemory.h>
|
|
|
|
#include <IO/CompressedStream.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Event ReadCompressedBytes;
|
|
|
|
extern const Event CompressedReadBufferBlocks;
|
|
|
|
extern const Event CompressedReadBufferBytes;
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int UNKNOWN_COMPRESSION_METHOD;
|
|
|
|
extern const int TOO_LARGE_SIZE_COMPRESSED;
|
|
|
|
extern const int CHECKSUM_DOESNT_MATCH;
|
|
|
|
extern const int CANNOT_DECOMPRESS;
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
|
|
|
|
/// Returns number of compressed bytes read.
|
|
|
|
size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (compressed_in->eof())
|
|
|
|
return 0;
|
|
|
|
|
2017-06-21 08:35:38 +00:00
|
|
|
CityHash_v1_0_2::uint128 checksum;
|
2017-04-01 07:20:54 +00:00
|
|
|
compressed_in->readStrict(reinterpret_cast<char *>(&checksum), sizeof(checksum));
|
|
|
|
|
|
|
|
own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE);
|
2018-09-02 03:00:04 +00:00
|
|
|
compressed_in->readStrict(own_compressed_buffer.data(), COMPRESSED_BLOCK_HEADER_SIZE);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
UInt8 method = own_compressed_buffer[0]; /// See CompressedWriteBuffer.h
|
|
|
|
|
|
|
|
size_t & size_compressed = size_compressed_without_checksum;
|
|
|
|
|
2017-08-01 08:12:15 +00:00
|
|
|
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) ||
|
2017-09-07 17:55:55 +00:00
|
|
|
method == static_cast<UInt8>(CompressionMethodByte::ZSTD) ||
|
|
|
|
method == static_cast<UInt8>(CompressionMethodByte::NONE))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
|
|
|
|
size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
|
|
|
|
|
|
|
if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
|
2018-07-13 03:40:47 +00:00
|
|
|
throw Exception("Too large size_compressed: " + toString(size_compressed) + ". Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed + sizeof(checksum));
|
|
|
|
|
|
|
|
/// Is whole compressed block located in 'compressed_in' buffer?
|
|
|
|
if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE &&
|
2018-01-16 01:59:51 +00:00
|
|
|
compressed_in->position() + size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
compressed_in->position() -= COMPRESSED_BLOCK_HEADER_SIZE;
|
|
|
|
compressed_buffer = compressed_in->position();
|
|
|
|
compressed_in->position() += size_compressed;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2018-01-16 01:59:51 +00:00
|
|
|
own_compressed_buffer.resize(size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
|
2018-09-02 03:00:04 +00:00
|
|
|
compressed_buffer = own_compressed_buffer.data();
|
2017-11-15 18:40:34 +00:00
|
|
|
compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2018-07-13 03:40:47 +00:00
|
|
|
if (!disable_checksum)
|
|
|
|
{
|
2018-07-12 18:47:58 +00:00
|
|
|
auto checksum_calculated = CityHash_v1_0_2::CityHash128(compressed_buffer, size_compressed);
|
|
|
|
if (checksum != checksum_calculated)
|
|
|
|
throw Exception("Checksum doesn't match: corrupted data."
|
2018-07-13 03:40:47 +00:00
|
|
|
" Reference: " + getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)
|
|
|
|
+ ". Actual: " + getHexUIntLowercase(checksum_calculated.first) + getHexUIntLowercase(checksum_calculated.second)
|
|
|
|
+ ". Size of compressed block: " + toString(size_compressed) + ".",
|
2018-07-12 18:47:58 +00:00
|
|
|
ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
return size_compressed + sizeof(checksum);
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
|
|
|
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
|
|
|
|
|
|
|
|
UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h
|
|
|
|
|
2017-06-15 18:21:59 +00:00
|
|
|
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-01-16 01:59:51 +00:00
|
|
|
LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, lz4_stat);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else if (method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
|
|
|
|
{
|
|
|
|
size_t res = ZSTD_decompress(
|
|
|
|
to, size_decompressed,
|
2017-11-15 18:40:34 +00:00
|
|
|
compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed_without_checksum - COMPRESSED_BLOCK_HEADER_SIZE);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (ZSTD_isError(res))
|
|
|
|
throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS);
|
|
|
|
}
|
2017-07-31 17:42:23 +00:00
|
|
|
else if (method == static_cast<UInt8>(CompressionMethodByte::NONE))
|
2017-07-31 11:36:58 +00:00
|
|
|
{
|
|
|
|
memcpy(to, &compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_decompressed);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
|
|
|
CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in)
|
2017-04-01 07:20:54 +00:00
|
|
|
: compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE)
|
2016-10-25 06:49:24 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper destruction of unique_ptr of forward-declared type.
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|