2013-04-24 10:31:32 +00:00
|
|
|
#pragma once
|
|
|
|
|
2019-06-18 12:54:27 +00:00
|
|
|
#include <Storages/MergeTree/IMergedBlockOutputStream.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/ColumnArray.h>
|
2022-03-14 19:15:07 +00:00
|
|
|
#include <IO/WriteSettings.h>
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2013-09-15 01:10:16 +00:00
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2015-08-14 02:45:40 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** To write one part.
|
2017-08-14 18:16:11 +00:00
|
|
|
* The data refers to one partition, and is written in one part.
|
2014-03-04 11:30:50 +00:00
|
|
|
*/
|
2017-11-20 02:15:15 +00:00
|
|
|
class MergedBlockOutputStream final : public IMergedBlockOutputStream
|
2014-03-04 11:30:50 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
MergedBlockOutputStream(
|
2020-02-04 13:34:57 +00:00
|
|
|
const MergeTreeDataPartPtr & data_part,
|
2020-06-16 15:51:29 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2017-12-25 21:57:29 +00:00
|
|
|
const NamesAndTypesList & columns_list_,
|
2020-04-10 13:36:51 +00:00
|
|
|
const MergeTreeIndices & skip_indices,
|
2020-08-26 15:29:46 +00:00
|
|
|
CompressionCodecPtr default_codec_,
|
2022-02-14 19:50:08 +00:00
|
|
|
const MergeTreeTransactionPtr & txn,
|
2021-10-29 17:21:02 +00:00
|
|
|
bool reset_columns_ = false,
|
2022-03-21 11:30:25 +00:00
|
|
|
bool blocks_are_granules_size = false,
|
|
|
|
const WriteSettings & write_settings = {});
|
2014-06-26 00:58:14 +00:00
|
|
|
|
2021-09-29 17:45:01 +00:00
|
|
|
Block getHeader() const { return metadata_snapshot->getSampleBlock(); }
|
2018-02-19 00:45:32 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// If the data is pre-sorted.
|
2017-04-01 07:20:54 +00:00
|
|
|
void write(const Block & block) override;
|
2014-03-04 11:30:50 +00:00
|
|
|
|
2017-11-20 02:15:15 +00:00
|
|
|
/** If the data is not sorted, but we have previously calculated the permutation, that will sort it.
|
2017-04-16 15:00:33 +00:00
|
|
|
* This method is used to save RAM, since you do not need to keep two blocks at once - the original one and the sorted one.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void writeWithPermutation(const Block & block, const IColumn::Permutation * permutation);
|
2014-03-27 17:30:04 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
/// Finalizer is a structure which is returned from by finalizePart().
|
|
|
|
/// Files from part may be written asynchronously, e.g. for blob storages.
|
|
|
|
/// You should call finish() to wait until all data is written.
|
|
|
|
struct Finalizer
|
|
|
|
{
|
|
|
|
struct Impl;
|
|
|
|
std::unique_ptr<Impl> impl;
|
|
|
|
|
|
|
|
explicit Finalizer(std::unique_ptr<Impl> impl_);
|
|
|
|
~Finalizer();
|
2022-02-17 15:56:42 +00:00
|
|
|
Finalizer(Finalizer &&) noexcept;
|
|
|
|
Finalizer & operator=(Finalizer &&) noexcept;
|
2022-02-01 10:36:51 +00:00
|
|
|
|
|
|
|
void finish();
|
|
|
|
};
|
|
|
|
|
2021-08-27 18:35:13 +00:00
|
|
|
/// Finalize writing part and fill inner structures
|
2021-08-26 11:01:15 +00:00
|
|
|
/// If part is new and contains projections, they should be added before invoking this method.
|
2022-02-01 10:36:51 +00:00
|
|
|
Finalizer finalizePartAsync(
|
|
|
|
MergeTreeData::MutableDataPartPtr & new_part,
|
|
|
|
bool sync,
|
|
|
|
const NamesAndTypesList * total_columns_list = nullptr,
|
2022-03-14 19:15:07 +00:00
|
|
|
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr,
|
2022-03-21 08:52:48 +00:00
|
|
|
const WriteSettings & settings = {});
|
2022-02-01 10:36:51 +00:00
|
|
|
|
|
|
|
void finalizePart(
|
2017-08-30 19:03:19 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr & new_part,
|
2022-02-01 10:36:51 +00:00
|
|
|
bool sync,
|
2017-12-25 21:57:29 +00:00
|
|
|
const NamesAndTypesList * total_columns_list = nullptr,
|
2017-08-30 19:03:19 +00:00
|
|
|
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
|
2016-11-03 12:00:44 +00:00
|
|
|
|
2015-04-10 15:31:51 +00:00
|
|
|
private:
|
2017-05-09 19:07:35 +00:00
|
|
|
/** If `permutation` is given, it rearranges the values in the columns when writing.
|
2017-04-16 15:00:33 +00:00
|
|
|
* This is necessary to not keep the whole block in the RAM to sort it.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void writeImpl(const Block & block, const IColumn::Permutation * permutation);
|
2015-08-14 02:45:40 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
|
|
|
|
WrittenFiles finalizePartOnDisk(
|
2022-01-20 23:22:16 +00:00
|
|
|
const MergeTreeData::DataPartPtr & new_part,
|
2022-03-14 19:15:07 +00:00
|
|
|
MergeTreeData::DataPart::Checksums & checksums,
|
|
|
|
const WriteSettings & write_settings);
|
2020-04-14 01:26:34 +00:00
|
|
|
|
2017-12-25 21:57:29 +00:00
|
|
|
NamesAndTypesList columns_list;
|
2020-04-14 19:47:19 +00:00
|
|
|
IMergeTreeDataPart::MinMaxIndex minmax_idx;
|
2017-10-24 14:11:53 +00:00
|
|
|
size_t rows_count = 0;
|
2020-08-26 15:29:46 +00:00
|
|
|
CompressionCodecPtr default_codec;
|
2013-04-24 10:31:32 +00:00
|
|
|
};
|
|
|
|
|
2021-09-16 21:19:58 +00:00
|
|
|
using MergedBlockOutputStreamPtr = std::shared_ptr<MergedBlockOutputStream>;
|
|
|
|
|
2013-09-26 19:16:43 +00:00
|
|
|
}
|