2019-06-18 12:54:27 +00:00
|
|
|
#include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
|
2020-04-14 01:26:34 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
|
2020-05-20 20:16:32 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2022-03-21 11:30:25 +00:00
|
|
|
#include <IO/WriteSettings.h>
|
2019-06-18 12:54:27 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
2019-06-18 12:54:27 +00:00
|
|
|
|
|
|
|
MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
|
2020-04-17 11:59:10 +00:00
|
|
|
const MergeTreeDataPartPtr & data_part,
|
2020-06-16 15:51:29 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2020-04-17 11:59:10 +00:00
|
|
|
const Block & header_,
|
|
|
|
CompressionCodecPtr default_codec,
|
2020-12-10 09:22:43 +00:00
|
|
|
const MergeTreeIndices & indices_to_recalc,
|
2019-12-09 21:21:17 +00:00
|
|
|
WrittenOffsetColumns * offset_columns_,
|
2019-11-08 14:36:10 +00:00
|
|
|
const MergeTreeIndexGranularity & index_granularity,
|
2020-04-17 11:59:10 +00:00
|
|
|
const MergeTreeIndexGranularityInfo * index_granularity_info)
|
2021-10-29 17:21:02 +00:00
|
|
|
: IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true)
|
2020-06-16 15:51:29 +00:00
|
|
|
, header(header_)
|
2019-06-18 12:54:27 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & global_settings = data_part->storage.getContext()->getSettings();
|
2020-12-07 12:47:51 +00:00
|
|
|
const auto & storage_settings = data_part->storage.getSettings();
|
|
|
|
|
2019-12-18 15:54:45 +00:00
|
|
|
MergeTreeWriterSettings writer_settings(
|
2020-02-04 13:34:57 +00:00
|
|
|
global_settings,
|
2022-03-21 11:30:25 +00:00
|
|
|
WriteSettings{},
|
2020-12-07 12:47:51 +00:00
|
|
|
storage_settings,
|
2020-02-04 13:34:57 +00:00
|
|
|
index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(),
|
2020-12-09 18:22:07 +00:00
|
|
|
/* rewrite_primary_key = */false);
|
2020-02-04 13:34:57 +00:00
|
|
|
|
2020-04-17 11:59:10 +00:00
|
|
|
writer = data_part->getWriter(
|
|
|
|
header.getNamesAndTypesList(),
|
2020-06-17 12:39:20 +00:00
|
|
|
metadata_snapshot_,
|
2020-04-17 11:59:10 +00:00
|
|
|
indices_to_recalc,
|
|
|
|
default_codec,
|
|
|
|
std::move(writer_settings),
|
|
|
|
index_granularity);
|
2019-11-08 14:36:10 +00:00
|
|
|
|
2020-04-14 01:26:34 +00:00
|
|
|
auto * writer_on_disk = dynamic_cast<MergeTreeDataPartWriterOnDisk *>(writer.get());
|
|
|
|
if (!writer_on_disk)
|
|
|
|
throw Exception("MergedColumnOnlyOutputStream supports only parts stored on disk", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
|
|
|
|
writer_on_disk->setWrittenOffsetColumns(offset_columns_);
|
2019-09-02 16:31:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void MergedColumnOnlyOutputStream::write(const Block & block)
|
|
|
|
{
|
2020-04-14 01:26:34 +00:00
|
|
|
if (!block.rows())
|
2019-06-24 15:07:01 +00:00
|
|
|
return;
|
2019-06-24 13:44:44 +00:00
|
|
|
|
2020-12-10 08:57:52 +00:00
|
|
|
writer->write(block, nullptr);
|
2021-10-29 17:21:02 +00:00
|
|
|
new_serialization_infos.add(block);
|
2019-06-18 12:54:27 +00:00
|
|
|
}
|
|
|
|
|
2020-03-13 15:09:55 +00:00
|
|
|
MergeTreeData::DataPart::Checksums
|
2022-02-08 08:01:26 +00:00
|
|
|
MergedColumnOnlyOutputStream::fillChecksums(
|
2020-06-25 16:55:45 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr & new_part,
|
2022-02-08 08:01:26 +00:00
|
|
|
MergeTreeData::DataPart::Checksums & all_checksums)
|
2019-06-18 12:54:27 +00:00
|
|
|
{
|
|
|
|
/// Finish columns serialization.
|
|
|
|
MergeTreeData::DataPart::Checksums checksums;
|
2022-02-08 08:01:26 +00:00
|
|
|
writer->fillChecksums(checksums);
|
2019-06-24 13:44:44 +00:00
|
|
|
|
2021-02-10 14:12:49 +00:00
|
|
|
for (const auto & [projection_name, projection_part] : new_part->getProjectionParts())
|
|
|
|
checksums.addFile(
|
|
|
|
projection_name + ".proj",
|
|
|
|
projection_part->checksums.getTotalSizeOnDisk(),
|
|
|
|
projection_part->checksums.getTotalChecksumUInt128());
|
|
|
|
|
2020-03-13 15:09:55 +00:00
|
|
|
auto columns = new_part->getColumns();
|
2022-01-21 00:20:41 +00:00
|
|
|
auto serialization_infos = new_part->getSerializationInfos();
|
|
|
|
serialization_infos.replaceData(new_serialization_infos);
|
2020-03-13 15:09:55 +00:00
|
|
|
|
2022-01-21 00:20:41 +00:00
|
|
|
auto removed_files = removeEmptyColumnsFromPart(new_part, columns, serialization_infos, checksums);
|
2022-02-08 19:21:16 +00:00
|
|
|
|
|
|
|
auto disk = new_part->volume->getDisk();
|
2020-03-13 15:09:55 +00:00
|
|
|
for (const String & removed_file : removed_files)
|
2022-02-08 19:21:16 +00:00
|
|
|
{
|
2022-02-09 10:57:10 +00:00
|
|
|
auto file_path = new_part->getFullRelativePath() + removed_file;
|
|
|
|
/// Can be called multiple times, don't need to remove file twice
|
|
|
|
if (disk->exists(file_path))
|
|
|
|
disk->removeFile(file_path);
|
|
|
|
|
2020-03-13 15:09:55 +00:00
|
|
|
if (all_checksums.files.count(removed_file))
|
|
|
|
all_checksums.files.erase(removed_file);
|
2022-02-08 19:21:16 +00:00
|
|
|
}
|
2020-03-13 15:09:55 +00:00
|
|
|
|
2022-01-21 00:20:41 +00:00
|
|
|
new_part->setColumns(columns);
|
|
|
|
new_part->setSerializationInfos(serialization_infos);
|
|
|
|
|
2019-06-18 12:54:27 +00:00
|
|
|
return checksums;
|
|
|
|
}
|
|
|
|
|
2022-02-08 08:01:26 +00:00
|
|
|
void MergedColumnOnlyOutputStream::finish(bool sync)
|
|
|
|
{
|
|
|
|
writer->finish(sync);
|
|
|
|
}
|
|
|
|
|
2019-06-18 12:54:27 +00:00
|
|
|
}
|