ClickHouse/src/Storages/MergeTree/MergedBlockOutputStream.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

89 lines
3.1 KiB
C++
Raw Normal View History

2013-04-24 10:31:32 +00:00
#pragma once
#include <Storages/MergeTree/IMergedBlockOutputStream.h>
#include <Columns/ColumnArray.h>
#include <IO/WriteSettings.h>
2013-09-15 01:10:16 +00:00
2013-04-24 10:31:32 +00:00
namespace DB
{
2017-04-16 15:00:33 +00:00
/** To write one part.
* The data refers to one partition, and is written in one part.
*/
class MergedBlockOutputStream final : public IMergedBlockOutputStream
{
public:
MergedBlockOutputStream(
2022-10-22 22:51:59 +00:00
const MergeTreeMutableDataPartPtr & data_part,
const StorageMetadataPtr & metadata_snapshot_,
const NamesAndTypesList & columns_list_,
const MergeTreeIndices & skip_indices,
CompressionCodecPtr default_codec_,
2022-02-14 19:50:08 +00:00
const MergeTreeTransactionPtr & txn,
2021-10-29 17:21:02 +00:00
bool reset_columns_ = false,
bool blocks_are_granules_size = false,
const WriteSettings & write_settings = {});
2021-09-29 17:45:01 +00:00
Block getHeader() const { return metadata_snapshot->getSampleBlock(); }
2017-04-16 15:00:33 +00:00
/// If the data is pre-sorted.
void write(const Block & block) override;
/** If the data is not sorted, but we have previously calculated the permutation, that will sort it.
2017-04-16 15:00:33 +00:00
* This method is used to save RAM, since you do not need to keep two blocks at once - the original one and the sorted one.
*/
void writeWithPermutation(const Block & block, const IColumn::Permutation * permutation);
2014-03-27 17:30:04 +00:00
/// Finalizer is a structure which is returned from by finalizePart().
/// Files from part may be written asynchronously, e.g. for blob storages.
/// You should call finish() to wait until all data is written.
struct Finalizer
{
struct Impl;
std::unique_ptr<Impl> impl;
explicit Finalizer(std::unique_ptr<Impl> impl_);
~Finalizer();
Finalizer(Finalizer &&) noexcept;
Finalizer & operator=(Finalizer &&) noexcept;
void finish();
};
/// Finalize writing part and fill inner structures
2021-08-26 11:01:15 +00:00
/// If part is new and contains projections, they should be added before invoking this method.
Finalizer finalizePartAsync(
const MergeTreeMutableDataPartPtr & new_part,
bool sync,
const NamesAndTypesList * total_columns_list = nullptr,
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
void finalizePart(
const MergeTreeMutableDataPartPtr & new_part,
bool sync,
const NamesAndTypesList * total_columns_list = nullptr,
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
private:
/** If `permutation` is given, it rearranges the values in the columns when writing.
2017-04-16 15:00:33 +00:00
* This is necessary to not keep the whole block in the RAM to sort it.
*/
void writeImpl(const Block & block, const IColumn::Permutation * permutation);
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
WrittenFiles finalizePartOnDisk(
const MergeTreeMutableDataPartPtr & new_part,
MergeTreeData::DataPart::Checksums & checksums);
2020-04-14 01:26:34 +00:00
NamesAndTypesList columns_list;
2020-04-14 19:47:19 +00:00
IMergeTreeDataPart::MinMaxIndex minmax_idx;
size_t rows_count = 0;
CompressionCodecPtr default_codec;
2022-05-16 20:09:11 +00:00
WriteSettings write_settings;
2013-04-24 10:31:32 +00:00
};
using MergedBlockOutputStreamPtr = std::shared_ptr<MergedBlockOutputStream>;
2013-09-26 19:16:43 +00:00
}