2011-11-18 20:17:27 +00:00
|
|
|
|
#pragma once
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2010-06-07 12:48:32 +00:00
|
|
|
|
#include <vector>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2011-06-24 20:18:09 +00:00
|
|
|
|
#include <city.h>
|
2011-06-17 21:19:39 +00:00
|
|
|
|
#include <quicklz/quicklz_level1.h>
|
2012-01-05 18:35:22 +00:00
|
|
|
|
#include <lz4/lz4.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2013-12-09 00:23:17 +00:00
|
|
|
|
#include <DB/Common/PODArray.h>
|
2014-01-03 08:20:13 +00:00
|
|
|
|
#include <DB/Common/ProfileEvents.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
#include <DB/Core/Exception.h>
|
|
|
|
|
#include <DB/Core/ErrorCodes.h>
|
|
|
|
|
#include <DB/IO/ReadBuffer.h>
|
2011-06-27 18:22:14 +00:00
|
|
|
|
#include <DB/IO/BufferWithOwnMemory.h>
|
2011-06-17 21:19:39 +00:00
|
|
|
|
#include <DB/IO/CompressedStream.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2011-06-27 18:22:14 +00:00
|
|
|
|
class CompressedReadBuffer : public BufferWithOwnMemory<ReadBuffer>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
ReadBuffer & in;
|
|
|
|
|
|
2013-12-09 04:03:49 +00:00
|
|
|
|
/// Если в буфере in помещается целый сжатый блок - используем его. Иначе - копируем данные по кусочкам в own_compressed_buffer.
|
|
|
|
|
PODArray<char> own_compressed_buffer;
|
|
|
|
|
char * compressed_buffer;
|
|
|
|
|
|
2012-02-23 23:34:05 +00:00
|
|
|
|
qlz_state_decompress * qlz_state;
|
2011-06-17 21:19:39 +00:00
|
|
|
|
|
2013-09-08 05:53:10 +00:00
|
|
|
|
/** Указатель на кусок памяти, куда будут разжиматься блоки.
|
|
|
|
|
* Это может быть либо свой кусок памяти из BufferWithOwnMemory (по-умолчанию),
|
|
|
|
|
* либо пользователь может попросить разжимать данные в свой кусок памяти (метод setMemory).
|
|
|
|
|
*/
|
|
|
|
|
Memory * maybe_own_memory;
|
|
|
|
|
|
2013-09-08 00:00:25 +00:00
|
|
|
|
|
|
|
|
|
/// Прочитать сжатые данные в compressed_buffer. Достать из их заголовка размер разжатых данных. Проверить чексумму.
|
|
|
|
|
bool readCompressedData(size_t & size_decompressed)
|
2010-06-04 18:25:25 +00:00
|
|
|
|
{
|
2011-06-17 21:31:41 +00:00
|
|
|
|
if (in.eof())
|
|
|
|
|
return false;
|
2011-06-24 20:18:09 +00:00
|
|
|
|
|
|
|
|
|
uint128 checksum;
|
|
|
|
|
in.readStrict(reinterpret_cast<char *>(&checksum), sizeof(checksum));
|
2011-06-26 21:30:59 +00:00
|
|
|
|
|
2013-12-09 04:03:49 +00:00
|
|
|
|
in.readStrict(&own_compressed_buffer[0], QUICKLZ_HEADER_SIZE);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2013-12-09 04:03:49 +00:00
|
|
|
|
size_t size_compressed = qlz_size_compressed(&own_compressed_buffer[0]);
|
2011-06-27 18:33:23 +00:00
|
|
|
|
if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
|
2011-06-24 20:18:09 +00:00
|
|
|
|
throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);
|
2011-06-26 21:30:59 +00:00
|
|
|
|
|
2014-01-03 08:20:13 +00:00
|
|
|
|
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed + sizeof(checksum));
|
|
|
|
|
|
2013-12-09 04:03:49 +00:00
|
|
|
|
size_decompressed = qlz_size_decompressed(&own_compressed_buffer[0]);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
2013-12-09 04:03:49 +00:00
|
|
|
|
/// Находится ли сжатый блок целиком в буфере in?
|
|
|
|
|
if (in.offset() >= QUICKLZ_HEADER_SIZE && in.position() + size_compressed - QUICKLZ_HEADER_SIZE <= in.buffer().end())
|
|
|
|
|
{
|
|
|
|
|
in.position() -= QUICKLZ_HEADER_SIZE;
|
|
|
|
|
compressed_buffer = in.position();
|
|
|
|
|
in.position() += size_compressed;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
own_compressed_buffer.resize(size_compressed);
|
|
|
|
|
compressed_buffer = &own_compressed_buffer[0];
|
|
|
|
|
in.readStrict(&compressed_buffer[QUICKLZ_HEADER_SIZE], size_compressed - QUICKLZ_HEADER_SIZE);
|
|
|
|
|
}
|
2010-06-07 12:48:32 +00:00
|
|
|
|
|
2011-06-24 20:18:09 +00:00
|
|
|
|
if (checksum != CityHash128(&compressed_buffer[0], size_compressed))
|
2013-12-09 04:03:49 +00:00
|
|
|
|
throw Exception("Checksum doesn't match: corrupted data.", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
2011-06-26 21:30:59 +00:00
|
|
|
|
|
2013-09-08 00:00:25 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void decompress(char * to, size_t size_decompressed)
|
|
|
|
|
{
|
2014-01-03 08:20:13 +00:00
|
|
|
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
|
|
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
|
/// Старший бит первого байта определяет использованный метод сжатия.
|
|
|
|
|
if ((compressed_buffer[0] & 0x80) == 0)
|
2012-06-21 18:14:38 +00:00
|
|
|
|
{
|
|
|
|
|
if (!qlz_state)
|
|
|
|
|
qlz_state = new qlz_state_decompress;
|
2013-09-08 00:00:25 +00:00
|
|
|
|
|
|
|
|
|
qlz_decompress(&compressed_buffer[0], to, qlz_state);
|
2012-06-21 18:14:38 +00:00
|
|
|
|
}
|
2012-01-05 18:35:22 +00:00
|
|
|
|
else
|
2014-01-11 20:04:07 +00:00
|
|
|
|
LZ4_decompress_fast(&compressed_buffer[QUICKLZ_HEADER_SIZE], to, size_decompressed);
|
2013-09-08 00:00:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool nextImpl()
|
|
|
|
|
{
|
|
|
|
|
size_t size_decompressed = 0;
|
|
|
|
|
|
|
|
|
|
if (!readCompressedData(size_decompressed))
|
|
|
|
|
return false;
|
|
|
|
|
|
2013-09-08 05:53:10 +00:00
|
|
|
|
maybe_own_memory->resize(size_decompressed);
|
|
|
|
|
internal_buffer = Buffer(&(*maybe_own_memory)[0], &(*maybe_own_memory)[size_decompressed]);
|
|
|
|
|
working_buffer = Buffer(&(*maybe_own_memory)[0], &(*maybe_own_memory)[size_decompressed]);
|
2013-09-08 00:00:25 +00:00
|
|
|
|
|
|
|
|
|
decompress(working_buffer.begin(), size_decompressed);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2011-06-26 21:30:59 +00:00
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
CompressedReadBuffer(ReadBuffer & in_)
|
2012-06-21 18:43:29 +00:00
|
|
|
|
: BufferWithOwnMemory<ReadBuffer>(0),
|
|
|
|
|
in(in_),
|
2013-12-09 04:03:49 +00:00
|
|
|
|
own_compressed_buffer(QUICKLZ_HEADER_SIZE),
|
|
|
|
|
compressed_buffer(NULL),
|
2013-09-08 05:53:10 +00:00
|
|
|
|
qlz_state(NULL),
|
|
|
|
|
maybe_own_memory(&memory)
|
2011-06-26 21:30:59 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
2012-02-23 23:34:05 +00:00
|
|
|
|
|
|
|
|
|
~CompressedReadBuffer()
|
|
|
|
|
{
|
2012-06-21 18:14:38 +00:00
|
|
|
|
if (qlz_state)
|
|
|
|
|
delete qlz_state;
|
2012-02-23 23:34:05 +00:00
|
|
|
|
}
|
2013-09-08 00:00:25 +00:00
|
|
|
|
|
|
|
|
|
|
2013-09-08 05:53:10 +00:00
|
|
|
|
/// Использовать предоставленный пользователем кусок памяти для разжатия. (Для реализации кэша разжатых блоков.)
|
|
|
|
|
void setMemory(Memory & memory_)
|
|
|
|
|
{
|
|
|
|
|
maybe_own_memory = &memory_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-09-08 00:00:25 +00:00
|
|
|
|
size_t readBig(char * to, size_t n)
|
|
|
|
|
{
|
|
|
|
|
size_t bytes_read = 0;
|
|
|
|
|
|
|
|
|
|
/// Если в буфере есть непрочитанные байты, то скопируем сколько надо в to.
|
|
|
|
|
if (pos < working_buffer.end())
|
|
|
|
|
bytes_read += read(to, std::min(static_cast<size_t>(working_buffer.end() - pos), n));
|
|
|
|
|
|
|
|
|
|
if (bytes_read < n)
|
|
|
|
|
bytes += offset();
|
|
|
|
|
|
|
|
|
|
/// Если надо ещё прочитать - будем, по возможности, разжимать сразу в to.
|
|
|
|
|
while (bytes_read < n)
|
|
|
|
|
{
|
|
|
|
|
size_t size_decompressed = 0;
|
|
|
|
|
|
|
|
|
|
if (!readCompressedData(size_decompressed))
|
|
|
|
|
{
|
|
|
|
|
working_buffer.resize(0);
|
|
|
|
|
pos = working_buffer.begin();
|
|
|
|
|
return bytes_read;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Если разжатый блок помещается целиком туда, куда его надо скопировать.
|
|
|
|
|
if (size_decompressed <= n - bytes_read)
|
|
|
|
|
{
|
|
|
|
|
decompress(to + bytes_read, size_decompressed);
|
|
|
|
|
bytes_read += size_decompressed;
|
|
|
|
|
bytes += size_decompressed;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2013-09-08 05:53:10 +00:00
|
|
|
|
maybe_own_memory->resize(size_decompressed);
|
|
|
|
|
internal_buffer = Buffer(&(*maybe_own_memory)[0], &(*maybe_own_memory)[size_decompressed]);
|
|
|
|
|
working_buffer = Buffer(&(*maybe_own_memory)[0], &(*maybe_own_memory)[size_decompressed]);
|
2013-09-08 00:00:25 +00:00
|
|
|
|
pos = working_buffer.begin();
|
|
|
|
|
|
|
|
|
|
decompress(working_buffer.begin(), size_decompressed);
|
|
|
|
|
|
|
|
|
|
bytes_read += read(to + bytes_read, n - bytes_read);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return bytes_read;
|
|
|
|
|
}
|
2010-06-04 18:25:25 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|