2013-04-24 10:31:32 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
2013-09-15 01:10:16 +00:00
|
|
|
|
#include <DB/IO/WriteBufferFromFile.h>
|
|
|
|
|
#include <DB/IO/CompressedWriteBuffer.h>
|
2014-03-27 17:30:04 +00:00
|
|
|
|
#include <DB/IO/HashingWriteBuffer.h>
|
2014-03-09 17:36:01 +00:00
|
|
|
|
#include <DB/Storages/MergeTree/MergeTreeData.h>
|
2015-04-16 06:12:35 +00:00
|
|
|
|
#include <DB/DataStreams/IBlockOutputStream.h>
|
2013-04-24 10:31:32 +00:00
|
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
|
#include <DB/Columns/ColumnArray.h>
|
|
|
|
|
|
2013-09-15 01:10:16 +00:00
|
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
|
namespace DB
|
|
|
|
|
{
|
2015-08-14 02:45:40 +00:00
|
|
|
|
|
|
|
|
|
|
2014-03-04 11:30:50 +00:00
|
|
|
|
class IMergedBlockOutputStream : public IBlockOutputStream
|
2013-04-24 10:31:32 +00:00
|
|
|
|
{
|
|
|
|
|
public:
|
2015-03-14 02:36:39 +00:00
|
|
|
|
IMergedBlockOutputStream(
|
|
|
|
|
MergeTreeData & storage_,
|
|
|
|
|
size_t min_compress_block_size_,
|
|
|
|
|
size_t max_compress_block_size_,
|
2015-04-10 15:31:51 +00:00
|
|
|
|
CompressionMethod compression_method_,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
size_t aio_threshold_);
|
2013-08-24 08:01:19 +00:00
|
|
|
|
|
2014-03-04 11:30:50 +00:00
|
|
|
|
protected:
|
2015-03-14 02:36:39 +00:00
|
|
|
|
using OffsetColumns = std::set<std::string>;
|
|
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
|
struct ColumnStream
|
|
|
|
|
{
|
2015-03-14 02:36:39 +00:00
|
|
|
|
ColumnStream(
|
|
|
|
|
const String & escaped_column_name_,
|
|
|
|
|
const String & data_path,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
const std::string & data_file_extension_,
|
2015-03-14 02:36:39 +00:00
|
|
|
|
const std::string & marks_path,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
const std::string & marks_file_extension_,
|
2015-03-14 02:36:39 +00:00
|
|
|
|
size_t max_compress_block_size,
|
2015-04-08 16:48:47 +00:00
|
|
|
|
CompressionMethod compression_method,
|
|
|
|
|
size_t estimated_size,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
size_t aio_threshold);
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2014-03-27 17:30:04 +00:00
|
|
|
|
String escaped_column_name;
|
2016-07-21 16:22:24 +00:00
|
|
|
|
std::string data_file_extension;
|
|
|
|
|
std::string marks_file_extension;
|
2014-04-14 13:08:26 +00:00
|
|
|
|
|
|
|
|
|
/// compressed -> compressed_buf -> plain_hashing -> plain_file
|
2015-04-08 16:48:47 +00:00
|
|
|
|
std::unique_ptr<WriteBufferFromFileBase> plain_file;
|
2014-04-14 13:08:26 +00:00
|
|
|
|
HashingWriteBuffer plain_hashing;
|
2014-03-27 17:30:04 +00:00
|
|
|
|
CompressedWriteBuffer compressed_buf;
|
2014-04-14 13:13:20 +00:00
|
|
|
|
HashingWriteBuffer compressed;
|
2014-04-14 13:08:26 +00:00
|
|
|
|
|
|
|
|
|
/// marks -> marks_file
|
|
|
|
|
WriteBufferFromFile marks_file;
|
2014-03-27 17:30:04 +00:00
|
|
|
|
HashingWriteBuffer marks;
|
2013-09-15 01:10:16 +00:00
|
|
|
|
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void finalize();
|
|
|
|
|
|
|
|
|
|
void sync();
|
|
|
|
|
|
|
|
|
|
void addToChecksums(MergeTreeData::DataPart::Checksums & checksums, String name = "");
|
2013-04-24 10:31:32 +00:00
|
|
|
|
};
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2015-03-14 02:36:39 +00:00
|
|
|
|
using ColumnStreams = std::map<String, std::unique_ptr<ColumnStream>>;
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
|
void addStream(const String & path, const String & name, const IDataType & type, size_t estimated_size = 0,
|
2016-12-10 04:51:36 +00:00
|
|
|
|
size_t level = 0, const String & filename = "", bool skip_offsets = false);
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
|
/// Записать данные одного столбца.
|
2016-11-03 12:00:44 +00:00
|
|
|
|
void writeData(const String & name, const IDataType & type, const IColumn & column, OffsetColumns & offset_columns,
|
2016-12-10 04:51:36 +00:00
|
|
|
|
size_t level = 0, bool skip_offsets = false);
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2014-03-09 17:36:01 +00:00
|
|
|
|
MergeTreeData & storage;
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
|
|
|
|
ColumnStreams column_streams;
|
|
|
|
|
|
|
|
|
|
/// Смещение до первой строчки блока, для которой надо записать индекс.
|
2015-03-14 02:36:39 +00:00
|
|
|
|
size_t index_offset = 0;
|
2014-04-08 15:29:12 +00:00
|
|
|
|
|
|
|
|
|
size_t min_compress_block_size;
|
|
|
|
|
size_t max_compress_block_size;
|
2015-03-14 02:36:39 +00:00
|
|
|
|
|
2015-04-08 16:48:47 +00:00
|
|
|
|
size_t aio_threshold;
|
|
|
|
|
|
2015-03-14 02:36:39 +00:00
|
|
|
|
CompressionMethod compression_method;
|
2016-10-20 10:13:07 +00:00
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
/// Internal version of writeData.
|
|
|
|
|
void writeDataImpl(const String & name, const IDataType & type, const IColumn & column,
|
2016-12-10 04:51:36 +00:00
|
|
|
|
OffsetColumns & offset_columns, size_t level, bool write_array_data, bool skip_offsets);
|
2014-03-04 11:30:50 +00:00
|
|
|
|
};
|
|
|
|
|
|
2015-08-14 02:45:40 +00:00
|
|
|
|
|
|
|
|
|
/** Для записи одного куска.
|
|
|
|
|
* Данные относятся к одному месяцу, и пишутся в один кускок.
|
2014-03-04 11:30:50 +00:00
|
|
|
|
*/
|
|
|
|
|
class MergedBlockOutputStream : public IMergedBlockOutputStream
|
|
|
|
|
{
|
|
|
|
|
public:
|
2015-03-14 02:36:39 +00:00
|
|
|
|
MergedBlockOutputStream(
|
|
|
|
|
MergeTreeData & storage_,
|
|
|
|
|
String part_path_,
|
|
|
|
|
const NamesAndTypesList & columns_list_,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
CompressionMethod compression_method);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-04-10 15:31:51 +00:00
|
|
|
|
MergedBlockOutputStream(
|
|
|
|
|
MergeTreeData & storage_,
|
|
|
|
|
String part_path_,
|
|
|
|
|
const NamesAndTypesList & columns_list_,
|
|
|
|
|
CompressionMethod compression_method,
|
2015-04-10 17:09:16 +00:00
|
|
|
|
const MergeTreeData::DataPart::ColumnToSize & merged_column_to_size_,
|
2016-07-21 16:22:24 +00:00
|
|
|
|
size_t aio_threshold_);
|
|
|
|
|
|
|
|
|
|
std::string getPartPath() const;
|
2016-01-28 16:06:57 +00:00
|
|
|
|
|
2015-08-14 02:45:40 +00:00
|
|
|
|
/// Если данные заранее отсортированы.
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void write(const Block & block) override;
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2015-08-14 02:45:40 +00:00
|
|
|
|
/** Если данные не отсортированы, но мы заранее вычислили перестановку, после которой они станут сортированными.
|
|
|
|
|
* Этот метод используется для экономии оперативки, так как не нужно держать одновременно два блока - исходный и отсортированный.
|
|
|
|
|
*/
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void writeWithPermutation(const Block & block, const IColumn::Permutation * permutation);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void writeSuffix() override;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-11-03 12:00:44 +00:00
|
|
|
|
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(
|
|
|
|
|
const NamesAndTypesList & total_column_list,
|
2016-12-10 04:51:36 +00:00
|
|
|
|
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
|
2016-11-03 12:00:44 +00:00
|
|
|
|
|
2016-07-21 16:22:24 +00:00
|
|
|
|
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums();
|
|
|
|
|
|
|
|
|
|
MergeTreeData::DataPart::Index & getIndex();
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2014-03-04 11:30:50 +00:00
|
|
|
|
/// Сколько засечек уже записано.
|
2016-07-21 16:22:24 +00:00
|
|
|
|
size_t marksCount();
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
2015-04-10 15:31:51 +00:00
|
|
|
|
private:
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void init();
|
2015-04-10 15:31:51 +00:00
|
|
|
|
|
2015-08-14 02:45:40 +00:00
|
|
|
|
/** Если задана permutation, то переставляет значения в столбцах при записи.
|
|
|
|
|
* Это нужно, чтобы не держать целый блок в оперативке для его сортировки.
|
|
|
|
|
*/
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void writeImpl(const Block & block, const IColumn::Permutation * permutation);
|
2015-08-14 02:45:40 +00:00
|
|
|
|
|
2014-03-04 11:30:50 +00:00
|
|
|
|
private:
|
2014-03-13 12:48:07 +00:00
|
|
|
|
NamesAndTypesList columns_list;
|
2014-03-27 12:32:37 +00:00
|
|
|
|
String part_path;
|
2014-03-13 12:48:07 +00:00
|
|
|
|
|
2015-03-14 02:36:39 +00:00
|
|
|
|
size_t marks_count = 0;
|
2014-03-27 12:32:37 +00:00
|
|
|
|
|
2016-05-28 14:14:18 +00:00
|
|
|
|
std::unique_ptr<WriteBufferFromFile> index_file_stream;
|
|
|
|
|
std::unique_ptr<HashingWriteBuffer> index_stream;
|
2016-02-14 05:43:03 +00:00
|
|
|
|
MergeTreeData::DataPart::Index index_columns;
|
2013-04-24 10:31:32 +00:00
|
|
|
|
};
|
|
|
|
|
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
|
|
|
|
/// Записывает только те, столбцы, что лежат в block
|
|
|
|
|
class MergedColumnOnlyOutputStream : public IMergedBlockOutputStream
|
|
|
|
|
{
|
|
|
|
|
public:
|
2016-12-10 04:51:36 +00:00
|
|
|
|
MergedColumnOnlyOutputStream(
|
|
|
|
|
MergeTreeData & storage_, String part_path_, bool sync_, CompressionMethod compression_method, bool skip_offsets_ = false);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
|
2016-07-21 16:22:24 +00:00
|
|
|
|
void write(const Block & block) override;
|
|
|
|
|
void writeSuffix() override;
|
|
|
|
|
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums();
|
2014-03-04 11:30:50 +00:00
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
String part_path;
|
|
|
|
|
|
2015-03-14 02:36:39 +00:00
|
|
|
|
bool initialized = false;
|
2014-03-05 16:28:24 +00:00
|
|
|
|
bool sync;
|
2016-11-03 12:00:44 +00:00
|
|
|
|
bool skip_offsets;
|
2014-03-04 11:30:50 +00:00
|
|
|
|
};
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-09-26 19:16:43 +00:00
|
|
|
|
}
|