2013-09-08 05:53:10 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
2015-03-31 12:23:22 +00:00
|
|
|
|
#include <DB/IO/createReadBufferFromFileBase.h>
|
2014-01-15 14:53:20 +00:00
|
|
|
|
#include <DB/IO/CompressedReadBufferBase.h>
|
2013-09-08 05:53:10 +00:00
|
|
|
|
#include <DB/IO/UncompressedCache.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/** Буфер для чтения из сжатого файла с использованием кэша разжатых блоков.
|
|
|
|
|
* Кэш внешний - передаётся в качестве аргумента в конструктор.
|
|
|
|
|
* Позволяет увеличить производительность в случае, когда часто читаются одни и те же блоки.
|
|
|
|
|
* Недостатки:
|
|
|
|
|
* - в случае, если нужно читать много данных подряд, но из них только часть закэширована, приходится делать seek-и.
|
|
|
|
|
*/
|
2014-01-15 14:53:20 +00:00
|
|
|
|
class CachedCompressedReadBuffer : public CompressedReadBufferBase, public ReadBuffer
|
2013-09-08 05:53:10 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
const std::string path;
|
2013-11-26 11:55:11 +00:00
|
|
|
|
UncompressedCache * cache;
|
2013-09-08 05:53:10 +00:00
|
|
|
|
size_t buf_size;
|
2015-04-13 15:02:39 +00:00
|
|
|
|
size_t estimated_size;
|
2015-03-31 12:23:22 +00:00
|
|
|
|
size_t aio_threshold;
|
2013-09-08 05:53:10 +00:00
|
|
|
|
|
|
|
|
|
/// SharedPtr - для ленивой инициализации (только в случае кэш-промаха).
|
2015-03-31 12:23:22 +00:00
|
|
|
|
Poco::SharedPtr<ReadBufferFromFileBase> file_in;
|
2014-01-15 14:53:20 +00:00
|
|
|
|
size_t file_pos;
|
2013-09-08 05:53:10 +00:00
|
|
|
|
|
|
|
|
|
/// Кусок данных из кэша, или кусок считанных данных, который мы положим в кэш.
|
2014-02-11 11:05:02 +00:00
|
|
|
|
UncompressedCache::MappedPtr owned_cell;
|
2013-09-08 05:53:10 +00:00
|
|
|
|
|
2013-12-22 17:28:47 +00:00
|
|
|
|
void initInput()
|
|
|
|
|
{
|
2014-01-15 14:53:20 +00:00
|
|
|
|
if (!file_in)
|
2013-12-22 17:28:47 +00:00
|
|
|
|
{
|
2015-09-03 12:07:46 +00:00
|
|
|
|
file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size);
|
2014-01-15 14:53:20 +00:00
|
|
|
|
compressed_in = &*file_in;
|
2013-12-22 17:28:47 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-08 05:53:10 +00:00
|
|
|
|
bool nextImpl()
|
|
|
|
|
{
|
2014-01-15 14:53:20 +00:00
|
|
|
|
/// Проверим наличие разжатого блока в кэше, захватим владение этим блоком, если он есть.
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
2014-02-11 11:05:02 +00:00
|
|
|
|
UInt128 key = cache->hash(path, file_pos);
|
2014-01-15 14:53:20 +00:00
|
|
|
|
owned_cell = cache->get(key);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
if (!owned_cell)
|
|
|
|
|
{
|
|
|
|
|
/// Если нет - надо прочитать его из файла.
|
|
|
|
|
initInput();
|
2015-04-03 13:52:54 +00:00
|
|
|
|
file_in->seek(file_pos);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
2014-02-11 11:05:02 +00:00
|
|
|
|
owned_cell.reset(new UncompressedCacheCell);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
size_t size_decompressed;
|
2015-03-09 01:15:43 +00:00
|
|
|
|
size_t size_compressed_without_checksum;
|
|
|
|
|
owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
if (owned_cell->compressed_size)
|
|
|
|
|
{
|
|
|
|
|
owned_cell->data.resize(size_decompressed);
|
2015-03-09 01:15:43 +00:00
|
|
|
|
decompress(owned_cell->data.m_data, size_decompressed, size_compressed_without_checksum);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
|
|
|
|
|
/// Положим данные в кэш.
|
2014-02-11 11:05:02 +00:00
|
|
|
|
cache->set(key, owned_cell);
|
2013-12-22 21:36:54 +00:00
|
|
|
|
}
|
2013-11-26 11:55:11 +00:00
|
|
|
|
}
|
2013-09-08 05:53:10 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
if (owned_cell->data.m_size == 0)
|
|
|
|
|
{
|
2014-04-08 07:31:51 +00:00
|
|
|
|
owned_cell = nullptr;
|
2014-01-15 14:53:20 +00:00
|
|
|
|
return false;
|
2013-09-08 05:53:10 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
working_buffer = Buffer(owned_cell->data.m_data, owned_cell->data.m_data + owned_cell->data.m_size);
|
2013-12-23 05:56:59 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
file_pos += owned_cell->compressed_size;
|
|
|
|
|
|
|
|
|
|
return true;
|
2013-12-23 05:56:59 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-08 05:53:10 +00:00
|
|
|
|
public:
|
2015-07-08 17:59:44 +00:00
|
|
|
|
CachedCompressedReadBuffer(
|
|
|
|
|
const std::string & path_, UncompressedCache * cache_, size_t estimated_size_, size_t aio_threshold_,
|
2015-09-03 12:07:46 +00:00
|
|
|
|
size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE)
|
|
|
|
|
: ReadBuffer(nullptr, 0), path(path_), cache(cache_), buf_size(buf_size_), estimated_size(estimated_size_),
|
|
|
|
|
aio_threshold(aio_threshold_), file_pos(0)
|
2013-11-26 11:55:11 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-22 17:28:47 +00:00
|
|
|
|
|
2013-11-26 11:55:11 +00:00
|
|
|
|
void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block)
|
2013-09-08 05:53:10 +00:00
|
|
|
|
{
|
2014-01-15 14:53:20 +00:00
|
|
|
|
if (owned_cell &&
|
|
|
|
|
offset_in_compressed_file == file_pos - owned_cell->compressed_size &&
|
|
|
|
|
offset_in_decompressed_block <= working_buffer.size())
|
2013-11-26 11:55:11 +00:00
|
|
|
|
{
|
2014-01-15 14:53:20 +00:00
|
|
|
|
bytes += offset();
|
2013-11-26 11:55:11 +00:00
|
|
|
|
pos = working_buffer.begin() + offset_in_decompressed_block;
|
2014-01-15 14:53:20 +00:00
|
|
|
|
bytes -= offset();
|
2013-11-26 11:55:11 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2014-01-15 14:53:20 +00:00
|
|
|
|
file_pos = offset_in_compressed_file;
|
2013-12-23 04:16:59 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
bytes += offset();
|
|
|
|
|
nextImpl();
|
2013-12-23 04:16:59 +00:00
|
|
|
|
|
2013-11-26 11:55:11 +00:00
|
|
|
|
if (offset_in_decompressed_block > working_buffer.size())
|
|
|
|
|
throw Exception("Seek position is beyond the decompressed block", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
2013-12-23 04:16:59 +00:00
|
|
|
|
|
2014-01-15 14:53:20 +00:00
|
|
|
|
pos = working_buffer.begin() + offset_in_decompressed_block;
|
|
|
|
|
bytes -= offset();
|
2013-11-26 11:55:11 +00:00
|
|
|
|
}
|
2013-09-08 05:53:10 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|