2018-12-28 17:26:10 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-08-16 00:00:32 +00:00
|
|
|
#include <Compression/CompressedReadBufferBase.h>
|
2020-02-18 12:24:20 +00:00
|
|
|
#include <IO/ReadBufferFromFileBase.h>
|
2021-08-16 00:00:32 +00:00
|
|
|
#include <IO/ReadSettings.h>
|
2020-02-18 12:24:20 +00:00
|
|
|
#include <time.h>
|
|
|
|
#include <memory>
|
2018-12-28 17:26:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-03-28 19:24:28 +00:00
|
|
|
class MMappedFileCache;
|
2021-03-28 01:10:30 +00:00
|
|
|
|
2018-12-28 17:26:10 +00:00
|
|
|
|
|
|
|
/// Unlike CompressedReadBuffer, it can do seek.
|
|
|
|
class CompressedReadBufferFromFile : public CompressedReadBufferBase, public BufferWithOwnMemory<ReadBuffer>
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
/** At any time, one of two things is true:
|
|
|
|
* a) size_compressed = 0
|
|
|
|
* b)
|
|
|
|
* - `working_buffer` contains the entire block.
|
|
|
|
* - `file_in` points to the end of this block.
|
|
|
|
* - `size_compressed` contains the compressed size of this block.
|
|
|
|
*/
|
|
|
|
std::unique_ptr<ReadBufferFromFileBase> p_file_in;
|
|
|
|
ReadBufferFromFileBase & file_in;
|
|
|
|
size_t size_compressed = 0;
|
|
|
|
|
2021-10-05 09:11:25 +00:00
|
|
|
/// This field inherited from ReadBuffer. It's used to perform "lazy" seek, so in seek() call we:
|
|
|
|
/// 1) actually seek only underlying compressed file_in to offset_in_compressed_file;
|
|
|
|
/// 2) reset current working_buffer;
|
|
|
|
/// 3) remember the position in decompressed block in nextimpl_working_buffer_offset.
|
|
|
|
/// After following ReadBuffer::next() -> nextImpl call we will read new data into working_buffer and
|
|
|
|
/// ReadBuffer::next() will move our position in the fresh working_buffer to nextimpl_working_buffer_offset and
|
|
|
|
/// reset it to zero.
|
|
|
|
///
|
|
|
|
/// NOTE: We have independent readBig implementation, so we have to take
|
|
|
|
/// nextimpl_working_buffer_offset into account there as well.
|
|
|
|
///
|
|
|
|
/* size_t nextimpl_working_buffer_offset; */
|
|
|
|
|
2018-12-28 17:26:10 +00:00
|
|
|
bool nextImpl() override;
|
2021-07-26 00:34:36 +00:00
|
|
|
void prefetch() override;
|
2018-12-28 17:26:10 +00:00
|
|
|
|
|
|
|
public:
|
2020-09-04 12:48:55 +00:00
|
|
|
CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_ = false);
|
2020-02-14 14:28:33 +00:00
|
|
|
|
2018-12-28 17:26:10 +00:00
|
|
|
CompressedReadBufferFromFile(
|
2021-08-16 00:00:32 +00:00
|
|
|
const std::string & path, const ReadSettings & settings, size_t estimated_size, bool allow_different_codecs_ = false);
|
2018-12-28 17:26:10 +00:00
|
|
|
|
2021-10-05 09:11:25 +00:00
|
|
|
/// Seek is lazy in some sense. We move position in compressed file_in to offset_in_compressed_file, but don't
|
|
|
|
/// read data into working_buffer and don't shit our position to offset_in_decompressed_block. Instead
|
|
|
|
/// we store this offset inside nextimpl_working_buffer_offset.
|
2018-12-28 17:26:10 +00:00
|
|
|
void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block);
|
|
|
|
|
|
|
|
size_t readBig(char * to, size_t n) override;
|
|
|
|
|
|
|
|
void setProfileCallback(const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE)
|
|
|
|
{
|
|
|
|
file_in.setProfileCallback(profile_callback_, clock_type_);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|