A small preparation for better handling of primary key in memory

This commit is contained in:
Alexey Milovidov 2024-02-17 06:16:01 +01:00
parent 6b32622c7d
commit ac5bef7c74
7 changed files with 18 additions and 17 deletions

View File

@ -54,7 +54,7 @@ public:
Values getValue(size_t part_idx, size_t mark) const
{
const auto & index = parts[part_idx].data_part->index;
const auto & index = parts[part_idx].data_part->getIndex();
Values values(index.size());
for (size_t i = 0; i < values.size(); ++i)
{

View File

@ -313,13 +313,13 @@ IMergeTreeDataPart::IMergeTreeDataPart(
const IMergeTreeDataPart * parent_part_)
: DataPartStorageHolder(data_part_storage_)
, storage(storage_)
, mutable_name(name_)
, name(mutable_name)
, info(info_)
, index_granularity_info(storage_, part_type_)
, part_type(part_type_)
, parent_part(parent_part_)
, parent_part_name(parent_part ? parent_part->name : "")
, mutable_name(name_)
{
if (parent_part)
{

View File

@ -75,6 +75,7 @@ public:
using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
using NameToNumber = std::unordered_map<std::string, size_t>;
using Index = Columns;
using IndexSizeByName = std::unordered_map<std::string, ColumnSize>;
using Type = MergeTreeDataPartType;
@ -212,10 +213,6 @@ public:
const MergeTreeData & storage;
private:
String mutable_name;
mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};
public:
const String & name; // const ref to private mutable_name
MergeTreePartInfo info;
@ -309,12 +306,6 @@ public:
/// Throws an exception if state of the part is not in affordable_states
void assertState(const std::initializer_list<MergeTreeDataPartState> & affordable_states) const;
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
using Index = Columns;
Index index;
MergeTreePartition partition;
/// Amount of rows between marks
@ -369,6 +360,9 @@ public:
/// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree.
int32_t metadata_version;
const Index & getIndex() const { return index; }
void setIndex(Columns index_) { index = std::move(index_); }
/// For data in RAM ('index')
UInt64 getIndexSizeInBytes() const;
UInt64 getIndexSizeInAllocatedBytes() const;
@ -567,6 +561,10 @@ public:
mutable std::atomic<time_t> last_removal_attempt_time = 0;
protected:
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
Index index;
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
ColumnSize total_columns_size;
@ -623,6 +621,9 @@ protected:
void initializeIndexGranularityInfo();
private:
String mutable_name;
mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};
/// In compact parts order of columns is necessary
NameToNumber column_name_to_position;

View File

@ -6824,7 +6824,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
{
for (const auto & part : real_parts)
{
const auto & primary_key_column = *part->index[0];
const auto & primary_key_column = *part->getIndex()[0];
auto & min_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
insert(min_column, primary_key_column[0]);
}
@ -6835,7 +6835,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
{
for (const auto & part : real_parts)
{
const auto & primary_key_column = *part->index[0];
const auto & primary_key_column = *part->getIndex()[0];
auto & max_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
insert(max_column, primary_key_column[primary_key_column.size() - 1]);
}

View File

@ -1087,7 +1087,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
MarkRanges res;
size_t marks_count = part->index_granularity.getMarksCount();
const auto & index = part->index;
const auto & index = part->getIndex();
if (marks_count == 0)
return res;

View File

@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
new_part->rows_count = rows_count;
new_part->modification_time = time(nullptr);
new_part->index = writer->releaseIndexColumns();
new_part->setIndex(writer->releaseIndexColumns());
new_part->checksums = checksums;
new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());

View File

@ -899,7 +899,7 @@ void finalizeMutatedPart(
new_data_part->rows_count = source_part->rows_count;
new_data_part->index_granularity = source_part->index_granularity;
new_data_part->index = source_part->index;
new_data_part->setIndex(source_part->getIndex());
new_data_part->minmax_idx = source_part->minmax_idx;
new_data_part->modification_time = time(nullptr);