ClickHouse/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

184 lines
7.9 KiB
C++
Raw Normal View History

2020-04-14 19:47:19 +00:00
#include "MergeTreeDataPartInMemory.h"
#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
2020-05-29 15:02:12 +00:00
#include <Storages/MergeTree/MergedBlockOutputStream.h>
2020-04-14 19:47:19 +00:00
#include <Storages/MergeTree/MergeTreeDataPartWriterInMemory.h>
#include <Storages/MergeTree/IMergeTreeReader.h>
2022-09-05 16:55:00 +00:00
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
2020-11-10 17:32:00 +00:00
#include <DataTypes/NestedUtils.h>
#include <Interpreters/Context.h>
2020-05-29 15:02:12 +00:00
#include <Poco/Logger.h>
2022-04-27 15:05:45 +00:00
#include <Common/logger_useful.h>
2020-04-14 19:47:19 +00:00
namespace DB
{
namespace ErrorCodes
{
2020-05-29 16:58:08 +00:00
extern const int DIRECTORY_ALREADY_EXISTS;
2020-04-14 19:47:19 +00:00
}
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
MergeTreeData & storage_,
const String & name_,
2022-04-07 17:44:49 +00:00
const DataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::InMemory, parent_part_)
2020-04-14 19:47:19 +00:00
{
2020-08-27 08:35:55 +00:00
default_codec = CompressionCodecFactory::instance().get("NONE", {});
2020-04-14 19:47:19 +00:00
}
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
const MergeTreeData & storage_,
const String & name_,
const MergeTreePartInfo & info_,
2022-04-07 17:44:49 +00:00
const DataPartStoragePtr & data_part_storage_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::InMemory, parent_part_)
2020-04-14 19:47:19 +00:00
{
2020-08-27 08:35:55 +00:00
default_codec = CompressionCodecFactory::instance().get("NONE", {});
2020-04-14 19:47:19 +00:00
}
IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
const NamesAndTypesList & columns_to_read,
2020-06-26 11:30:23 +00:00
const StorageMetadataPtr & metadata_snapshot,
2020-04-14 19:47:19 +00:00
const MarkRanges & mark_ranges,
UncompressedCache * /* uncompressed_cache */,
MarkCache * /* mark_cache */,
const MergeTreeReaderSettings & reader_settings,
const ValueSizeMap & /* avg_value_size_hints */,
const ReadBufferFromFileBase::ProfileCallback & /* profile_callback */) const
{
2022-09-05 16:55:00 +00:00
auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this());
2020-04-14 19:47:19 +00:00
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
return std::make_unique<MergeTreeReaderInMemory>(
2022-09-05 16:55:00 +00:00
read_info, ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings);
2020-04-14 19:47:19 +00:00
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
2022-04-19 19:34:41 +00:00
DataPartStorageBuilderPtr data_part_storage_builder_,
2020-04-14 19:47:19 +00:00
const NamesAndTypesList & columns_list,
2020-06-26 11:30:23 +00:00
const StorageMetadataPtr & metadata_snapshot,
2020-04-14 19:47:19 +00:00
const std::vector<MergeTreeIndexPtr> & /* indices_to_recalc */,
const CompressionCodecPtr & /* default_codec */,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & /* computed_index_granularity */) const
{
2022-04-19 19:34:41 +00:00
data_part_storage_builder = data_part_storage_builder_;
2020-04-14 19:47:19 +00:00
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
2020-06-26 11:30:23 +00:00
return std::make_unique<MergeTreeDataPartWriterInMemory>(
ptr, columns_list, metadata_snapshot, writer_settings);
2020-04-14 19:47:19 +00:00
}
2022-04-19 19:34:41 +00:00
DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const
2020-05-29 15:02:12 +00:00
{
2022-04-19 19:34:41 +00:00
auto current_full_path = data_part_storage_builder->getFullPath();
data_part_storage_builder->setRelativePath(new_relative_path);
2020-05-29 15:02:12 +00:00
auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count);
2022-04-19 19:34:41 +00:00
auto new_data_part_storage = data_part_storage_builder->getStorage();
auto new_data_part = storage.createPart(name, new_type, info, new_data_part_storage);
2020-05-29 15:02:12 +00:00
2020-10-29 16:18:25 +00:00
new_data_part->uuid = uuid;
2022-07-27 14:05:16 +00:00
new_data_part->setColumns(columns, {});
2021-05-20 06:30:13 +00:00
new_data_part->partition.value = partition.value;
2020-05-29 15:02:12 +00:00
new_data_part->minmax_idx = minmax_idx;
2022-04-19 19:34:41 +00:00
if (data_part_storage_builder->exists())
2020-05-29 15:02:12 +00:00
{
2022-04-19 19:34:41 +00:00
throw Exception(
ErrorCodes::DIRECTORY_ALREADY_EXISTS,
"Could not flush part {}. Part in {} already exists",
quoteString(current_full_path),
data_part_storage_builder->getFullPath());
2020-05-29 15:02:12 +00:00
}
2022-04-19 19:34:41 +00:00
data_part_storage_builder->createDirectories();
2020-05-29 15:02:12 +00:00
auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
2020-06-26 11:30:23 +00:00
auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices());
MergedBlockOutputStream out(new_data_part, data_part_storage_builder, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR);
2020-05-29 15:02:12 +00:00
out.write(block);
2021-08-26 11:01:15 +00:00
const auto & projections = metadata_snapshot->getProjections();
for (const auto & [projection_name, projection] : projection_parts)
{
if (projections.has(projection_name))
{
2022-04-19 19:34:41 +00:00
auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj");
if (projection_part_storage_builder->exists())
2021-08-26 11:01:15 +00:00
{
throw Exception(
ErrorCodes::DIRECTORY_ALREADY_EXISTS,
"Could not flush projection part {}. Projection part in {} already exists",
projection_name,
2022-04-19 19:34:41 +00:00
projection_part_storage_builder->getFullPath());
2021-08-26 11:01:15 +00:00
}
auto projection_part = asInMemoryPart(projection);
auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count);
MergeTreePartInfo projection_info("all", 0, 0, 0);
auto projection_data_part
2022-04-19 19:34:41 +00:00
= storage.createPart(projection_name, projection_type, projection_info, projection_part_storage_builder->getStorage(), parent_part);
2021-08-26 11:01:15 +00:00
projection_data_part->is_temp = false; // clean up will be done on parent part
2022-07-27 14:05:16 +00:00
projection_data_part->setColumns(projection->getColumns(), {});
2021-08-26 11:01:15 +00:00
2022-04-19 19:34:41 +00:00
projection_part_storage_builder->createDirectories();
2021-08-26 11:01:15 +00:00
const auto & desc = projections.get(name);
auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0);
auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices());
MergedBlockOutputStream projection_out(
2022-04-19 19:34:41 +00:00
projection_data_part, projection_part_storage_builder, desc.metadata, projection_part->columns, projection_indices,
2022-03-16 19:16:26 +00:00
projection_compression_codec, NO_TRANSACTION_PTR);
2021-08-26 11:01:15 +00:00
projection_out.write(projection_part->block);
projection_out.finalizePart(projection_data_part, false);
2021-08-26 11:01:15 +00:00
new_data_part->addProjectionPart(projection_name, std::move(projection_data_part));
}
}
out.finalizePart(new_data_part, false);
2022-04-19 19:34:41 +00:00
return new_data_part_storage;
2020-05-29 16:58:08 +00:00
}
2020-05-29 15:02:12 +00:00
2020-06-26 11:30:23 +00:00
void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const
2020-05-29 16:58:08 +00:00
{
String detached_path = getRelativePathForDetachedPart(prefix);
2022-04-19 19:34:41 +00:00
flushToDisk(detached_path, metadata_snapshot);
2020-05-29 15:02:12 +00:00
}
2022-06-28 10:51:49 +00:00
void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */, DataPartStorageBuilderPtr) const
2020-04-14 19:47:19 +00:00
{
2022-04-19 19:34:41 +00:00
data_part_storage->setRelativePath(new_relative_path);
if (data_part_storage_builder)
data_part_storage_builder->setRelativePath(new_relative_path);
2020-04-14 19:47:19 +00:00
}
2020-06-29 20:36:18 +00:00
void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const
2020-04-14 19:47:19 +00:00
{
2020-06-03 22:52:21 +00:00
auto it = checksums.files.find("data.bin");
if (it != checksums.files.end())
total_size.data_uncompressed += it->second.uncompressed_size;
for (const auto & column : columns)
each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize();
2020-04-14 19:47:19 +00:00
}
IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() const
{
SipHash hash;
IMergeTreeDataPart::Checksum checksum;
for (const auto & column : block)
column.column->updateHashFast(hash);
checksum.uncompressed_size = block.bytes();
2021-01-27 00:54:57 +00:00
hash.get128(checksum.uncompressed_hash);
return checksum;
}
2020-06-05 20:47:46 +00:00
DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part)
{
return std::dynamic_pointer_cast<const MergeTreeDataPartInMemory>(part);
}
2020-04-14 19:47:19 +00:00
}