2020-04-14 19:47:19 +00:00
|
|
|
#include "MergeTreeDataPartInMemory.h"
|
|
|
|
#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
|
2020-05-29 15:02:12 +00:00
|
|
|
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
2020-04-14 19:47:19 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartWriterInMemory.h>
|
|
|
|
#include <Storages/MergeTree/IMergeTreeReader.h>
|
2020-11-10 17:32:00 +00:00
|
|
|
#include <DataTypes/NestedUtils.h>
|
2020-06-03 13:27:54 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2020-05-29 15:02:12 +00:00
|
|
|
#include <Poco/Logger.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/logger_useful.h>
|
2020-04-14 19:47:19 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-05-29 16:58:08 +00:00
|
|
|
extern const int DIRECTORY_ALREADY_EXISTS;
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
|
|
|
|
MergeTreeData & storage_,
|
|
|
|
const String & name_,
|
2020-06-03 13:27:54 +00:00
|
|
|
const VolumePtr & volume_,
|
2021-02-10 14:12:49 +00:00
|
|
|
const std::optional<String> & relative_path_,
|
|
|
|
const IMergeTreeDataPart * parent_part_)
|
|
|
|
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-08-27 08:35:55 +00:00
|
|
|
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
|
|
|
|
const MergeTreeData & storage_,
|
|
|
|
const String & name_,
|
|
|
|
const MergeTreePartInfo & info_,
|
2020-06-03 13:27:54 +00:00
|
|
|
const VolumePtr & volume_,
|
2021-02-10 14:12:49 +00:00
|
|
|
const std::optional<String> & relative_path_,
|
|
|
|
const IMergeTreeDataPart * parent_part_)
|
|
|
|
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-08-27 08:35:55 +00:00
|
|
|
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
|
|
|
|
const NamesAndTypesList & columns_to_read,
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2020-04-14 19:47:19 +00:00
|
|
|
const MarkRanges & mark_ranges,
|
|
|
|
UncompressedCache * /* uncompressed_cache */,
|
|
|
|
MarkCache * /* mark_cache */,
|
|
|
|
const MergeTreeReaderSettings & reader_settings,
|
|
|
|
const ValueSizeMap & /* avg_value_size_hints */,
|
|
|
|
const ReadBufferFromFileBase::ProfileCallback & /* profile_callback */) const
|
|
|
|
{
|
|
|
|
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
|
|
|
|
return std::make_unique<MergeTreeReaderInMemory>(
|
2020-12-07 19:02:26 +00:00
|
|
|
ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings);
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
|
|
|
|
const NamesAndTypesList & columns_list,
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2020-04-14 19:47:19 +00:00
|
|
|
const std::vector<MergeTreeIndexPtr> & /* indices_to_recalc */,
|
|
|
|
const CompressionCodecPtr & /* default_codec */,
|
|
|
|
const MergeTreeWriterSettings & writer_settings,
|
|
|
|
const MergeTreeIndexGranularity & /* computed_index_granularity */) const
|
|
|
|
{
|
|
|
|
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
|
2020-06-26 11:30:23 +00:00
|
|
|
return std::make_unique<MergeTreeDataPartWriterInMemory>(
|
|
|
|
ptr, columns_list, metadata_snapshot, writer_settings);
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
2020-06-26 11:30:23 +00:00
|
|
|
void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const
|
2020-05-29 15:02:12 +00:00
|
|
|
{
|
2020-06-03 13:27:54 +00:00
|
|
|
const auto & disk = volume->getDisk();
|
2020-05-29 16:58:08 +00:00
|
|
|
String destination_path = base_path + new_relative_path;
|
2020-05-29 15:02:12 +00:00
|
|
|
|
|
|
|
auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count);
|
2020-06-03 13:27:54 +00:00
|
|
|
auto new_data_part = storage.createPart(name, new_type, info, volume, new_relative_path);
|
2020-05-29 15:02:12 +00:00
|
|
|
|
2020-10-29 16:18:25 +00:00
|
|
|
new_data_part->uuid = uuid;
|
2020-05-29 15:02:12 +00:00
|
|
|
new_data_part->setColumns(columns);
|
2021-05-20 06:30:13 +00:00
|
|
|
new_data_part->partition.value = partition.value;
|
2020-05-29 15:02:12 +00:00
|
|
|
new_data_part->minmax_idx = minmax_idx;
|
|
|
|
|
|
|
|
if (disk->exists(destination_path))
|
|
|
|
{
|
2020-05-29 16:58:08 +00:00
|
|
|
throw Exception("Could not flush part " + quoteString(getFullPath())
|
|
|
|
+ ". Part in " + fullPath(disk, destination_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
|
2020-05-29 15:02:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
disk->createDirectories(destination_path);
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
|
2020-06-26 11:30:23 +00:00
|
|
|
auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
|
2021-10-29 17:21:02 +00:00
|
|
|
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec);
|
2020-05-29 15:02:12 +00:00
|
|
|
out.write(block);
|
2021-08-26 11:01:15 +00:00
|
|
|
const auto & projections = metadata_snapshot->getProjections();
|
|
|
|
for (const auto & [projection_name, projection] : projection_parts)
|
|
|
|
{
|
|
|
|
if (projections.has(projection_name))
|
|
|
|
{
|
|
|
|
String projection_destination_path = fs::path(destination_path) / projection_name / ".proj";
|
|
|
|
if (disk->exists(projection_destination_path))
|
|
|
|
{
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::DIRECTORY_ALREADY_EXISTS,
|
|
|
|
"Could not flush projection part {}. Projection part in {} already exists",
|
|
|
|
projection_name,
|
|
|
|
fullPath(disk, projection_destination_path));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto projection_part = asInMemoryPart(projection);
|
|
|
|
auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count);
|
|
|
|
MergeTreePartInfo projection_info("all", 0, 0, 0);
|
|
|
|
auto projection_data_part
|
|
|
|
= storage.createPart(projection_name, projection_type, projection_info, volume, projection_name + ".proj", parent_part);
|
|
|
|
projection_data_part->is_temp = false; // clean up will be done on parent part
|
|
|
|
projection_data_part->setColumns(projection->getColumns());
|
|
|
|
|
|
|
|
disk->createDirectories(projection_destination_path);
|
|
|
|
const auto & desc = projections.get(name);
|
|
|
|
auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
|
|
|
|
auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices());
|
|
|
|
MergedBlockOutputStream projection_out(
|
2021-09-09 11:04:37 +00:00
|
|
|
projection_data_part, desc.metadata, projection_part->columns, projection_indices,
|
2021-10-29 17:21:02 +00:00
|
|
|
projection_compression_codec);
|
2021-10-14 16:44:08 +00:00
|
|
|
|
2021-08-26 11:01:15 +00:00
|
|
|
projection_out.write(projection_part->block);
|
|
|
|
projection_out.writeSuffixAndFinalizePart(projection_data_part);
|
|
|
|
new_data_part->addProjectionPart(projection_name, std::move(projection_data_part));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-29 15:02:12 +00:00
|
|
|
out.writeSuffixAndFinalizePart(new_data_part);
|
2020-05-29 16:58:08 +00:00
|
|
|
}
|
2020-05-29 15:02:12 +00:00
|
|
|
|
2020-06-26 11:30:23 +00:00
|
|
|
void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const
|
2020-05-29 16:58:08 +00:00
|
|
|
{
|
|
|
|
String detached_path = getRelativePathForDetachedPart(prefix);
|
2020-06-26 11:30:23 +00:00
|
|
|
flushToDisk(storage.getRelativeDataPath(), detached_path, metadata_snapshot);
|
2020-05-29 15:02:12 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 18:59:18 +00:00
|
|
|
void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) const
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-06-03 18:59:18 +00:00
|
|
|
relative_path = new_relative_path;
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
2020-06-29 20:36:18 +00:00
|
|
|
void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-06-03 22:52:21 +00:00
|
|
|
auto it = checksums.files.find("data.bin");
|
|
|
|
if (it != checksums.files.end())
|
|
|
|
total_size.data_uncompressed += it->second.uncompressed_size;
|
|
|
|
|
|
|
|
for (const auto & column : columns)
|
|
|
|
each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize();
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
2020-06-22 18:56:53 +00:00
|
|
|
IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() const
|
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
IMergeTreeDataPart::Checksum checksum;
|
|
|
|
for (const auto & column : block)
|
|
|
|
column.column->updateHashFast(hash);
|
|
|
|
|
|
|
|
checksum.uncompressed_size = block.bytes();
|
2021-01-27 00:54:57 +00:00
|
|
|
hash.get128(checksum.uncompressed_hash);
|
2020-06-22 18:56:53 +00:00
|
|
|
return checksum;
|
|
|
|
}
|
|
|
|
|
2020-06-05 20:47:46 +00:00
|
|
|
DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part)
|
|
|
|
{
|
|
|
|
return std::dynamic_pointer_cast<const MergeTreeDataPartInMemory>(part);
|
|
|
|
}
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|