fix index granularity

This commit is contained in:
Anton Popov 2024-11-12 16:42:49 +00:00
parent c5817d528c
commit 3bba64e975
9 changed files with 59 additions and 58 deletions

View File

@ -41,6 +41,11 @@ size_t MergeTreeIndexGranularity::getMarksCountWithoutFinal() const
return total - hasFinalMark(); return total - hasFinalMark();
} }
size_t MergeTreeIndexGranularity::getMarkStartingRow(size_t mark_index) const
{
return getRowsCountInRange(0, mark_index);
}
size_t MergeTreeIndexGranularity::getLastMarkRows() const size_t MergeTreeIndexGranularity::getLastMarkRows() const
{ {
return getMarkRows(getMarksCount() - 1); return getMarkRows(getMarksCount() - 1);
@ -48,7 +53,7 @@ size_t MergeTreeIndexGranularity::getLastMarkRows() const
size_t MergeTreeIndexGranularity::getLastNonFinalMarkRows() const size_t MergeTreeIndexGranularity::getLastNonFinalMarkRows() const
{ {
size_t last_mark_rows = getLastMarkRows(); size_t last_mark_rows = getMarkRows(getMarksCount() - 1);
if (last_mark_rows != 0) if (last_mark_rows != 0)
return last_mark_rows; return last_mark_rows;
return getMarkRows(getMarksCount() - 2); return getMarkRows(getMarksCount() - 2);
@ -140,5 +145,4 @@ MergeTreeIndexGranularityPtr createMergeTreeIndexGranularity(
return std::make_shared<MergeTreeIndexGranularityConstant>(computed_granularity); return std::make_shared<MergeTreeIndexGranularityConstant>(computed_granularity);
} }
} }

View File

@ -5,12 +5,7 @@
namespace DB namespace DB
{ {
/// Class contains information about index granularity in rows of IMergeTreeDataPart /// Class that contains information about index granularity in rows of IMergeTreeDataPart
/// Inside it contains vector of partial sums of rows after mark:
/// |-----|---|----|----|
/// | 5 | 8 | 12 | 16 |
/// If user doesn't specify setting index_granularity_bytes for MergeTree* table
/// all values in inner vector would have constant stride (default 8192).
class MergeTreeIndexGranularity class MergeTreeIndexGranularity
{ {
public: public:
@ -49,7 +44,7 @@ public:
virtual size_t getMarkRows(size_t mark_index) const = 0; virtual size_t getMarkRows(size_t mark_index) const = 0;
/// Return amount of rows before mark /// Return amount of rows before mark
virtual size_t getMarkStartingRow(size_t mark_index) const = 0; size_t getMarkStartingRow(size_t mark_index) const;
/// Amount of rows after last mark /// Amount of rows after last mark
size_t getLastMarkRows() const; size_t getLastMarkRows() const;

View File

@ -27,22 +27,11 @@ size_t MergeTreeIndexGranularityAdaptive::getMarkRows(size_t mark_index) const
return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1]; return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1];
} }
size_t MergeTreeIndexGranularityAdaptive::getMarkStartingRow(size_t mark_index) const
{
if (mark_index > getMarksCount())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
if (mark_index == 0)
return 0;
return marks_rows_partial_sums[mark_index - 1];
}
bool MergeTreeIndexGranularityAdaptive::hasFinalMark() const bool MergeTreeIndexGranularityAdaptive::hasFinalMark() const
{ {
if (marks_rows_partial_sums.empty()) if (marks_rows_partial_sums.empty())
return false; return false;
return marks_rows_partial_sums.back() == 0; return getLastMarkRows() == 0;
} }
size_t MergeTreeIndexGranularityAdaptive::getMarksCount() const size_t MergeTreeIndexGranularityAdaptive::getMarksCount() const
@ -92,9 +81,16 @@ void MergeTreeIndexGranularityAdaptive::adjustLastMark(size_t rows_count)
size_t MergeTreeIndexGranularityAdaptive::getRowsCountInRange(size_t begin, size_t end) const size_t MergeTreeIndexGranularityAdaptive::getRowsCountInRange(size_t begin, size_t end) const
{ {
if (end > getMarksCount())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
if (end == 0)
return 0;
size_t subtrahend = 0; size_t subtrahend = 0;
if (begin != 0) if (begin != 0)
subtrahend = marks_rows_partial_sums[begin - 1]; subtrahend = marks_rows_partial_sums[begin - 1];
return marks_rows_partial_sums[end - 1] - subtrahend; return marks_rows_partial_sums[end - 1] - subtrahend;
} }
@ -126,6 +122,7 @@ uint64_t MergeTreeIndexGranularityAdaptive::getBytesSize() const
{ {
return marks_rows_partial_sums.size() * sizeof(size_t); return marks_rows_partial_sums.size() * sizeof(size_t);
} }
uint64_t MergeTreeIndexGranularityAdaptive::getBytesAllocated() const uint64_t MergeTreeIndexGranularityAdaptive::getBytesAllocated() const
{ {
return marks_rows_partial_sums.capacity() * sizeof(size_t); return marks_rows_partial_sums.capacity() * sizeof(size_t);

View File

@ -4,6 +4,10 @@
namespace DB namespace DB
{ {
/// Class that stores adaptive index granularity.
/// Inside it contains vector of partial sums of rows after mark:
/// |-----|---|----|----|
/// | 5 | 8 | 12 | 16 |
class MergeTreeIndexGranularityAdaptive : public MergeTreeIndexGranularity class MergeTreeIndexGranularityAdaptive : public MergeTreeIndexGranularity
{ {
public: public:
@ -19,7 +23,6 @@ public:
size_t getTotalRows() const override; size_t getTotalRows() const override;
size_t getMarkRows(size_t mark_index) const override; size_t getMarkRows(size_t mark_index) const override;
size_t getMarkStartingRow(size_t mark_index) const override;
bool hasFinalMark() const override; bool hasFinalMark() const override;
void appendMark(size_t rows_count) override; void appendMark(size_t rows_count) override;

View File

@ -1,6 +1,5 @@
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h> #include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
namespace DB namespace DB
{ {
@ -38,22 +37,6 @@ size_t MergeTreeIndexGranularityConstant::getMarkRows(size_t mark_index) const
return 0; // Final mark. return 0; // Final mark.
} }
size_t MergeTreeIndexGranularityConstant::getMarkStartingRow(size_t mark_index) const
{
if (mark_index > getMarksCount())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
size_t total_rows = 0;
if (mark_index >= num_marks_without_final && mark_index != 0)
{
total_rows += last_mark_granularity;
mark_index = num_marks_without_final - 1;
}
total_rows += constant_granularity * mark_index;
return total_rows;
}
size_t MergeTreeIndexGranularityConstant::getMarksCount() const size_t MergeTreeIndexGranularityConstant::getMarksCount() const
{ {
return num_marks_without_final + has_final_mark; return num_marks_without_final + has_final_mark;
@ -104,8 +87,14 @@ void MergeTreeIndexGranularityConstant::adjustLastMark(size_t rows_count)
size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size_t end) const size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size_t end) const
{ {
if (end > getMarksCount())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
if (end == 0)
return 0;
size_t total_rows = 0; size_t total_rows = 0;
if (end >= num_marks_without_final && end != 0) if (end >= num_marks_without_final)
{ {
total_rows += last_mark_granularity; total_rows += last_mark_granularity;
end = num_marks_without_final - 1; end = num_marks_without_final - 1;
@ -115,27 +104,31 @@ size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size
return total_rows; return total_rows;
} }
size_t MergeTreeIndexGranularityConstant::getMarkUpperBoundForRow(size_t row_index) const
{
size_t num_rows_with_constant_granularity = (num_marks_without_final - 1) * constant_granularity;
/// All granules with constant granularity + last granule + final granule
if (row_index >= num_rows_with_constant_granularity)
return getMarksCount();
return (row_index + constant_granularity - 1) / constant_granularity;
}
size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const
{ {
size_t rows_before_mark = getMarkStartingRow(from_mark); size_t rows_before_mark = getMarkStartingRow(from_mark);
size_t last_row_pos = rows_before_mark + number_of_rows; size_t last_row_pos = rows_before_mark + number_of_rows;
if (last_row_pos >= (num_marks_without_final - 1) * constant_granularity) return getMarkUpperBoundForRow(last_row_pos) - from_mark;
return num_marks_without_final - from_mark;
return (last_row_pos + constant_granularity - 1) / constant_granularity - from_mark;
} }
size_t MergeTreeIndexGranularityConstant::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const size_t MergeTreeIndexGranularityConstant::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
{ {
UNUSED(from_mark, number_of_rows, offset_in_rows); size_t rows_before_mark = getMarkStartingRow(from_mark);
return 0; size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
// size_t rows_before_mark = getMarkStartingRow(from_mark);
// size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
// auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
// size_t to_mark = it - marks_rows_partial_sums.begin();
// return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; return getRowsCountInRange(from_mark, std::max(1UL, getMarkUpperBoundForRow(last_row_pos))) - offset_in_rows;
} }
std::string MergeTreeIndexGranularityConstant::describe() const std::string MergeTreeIndexGranularityConstant::describe() const

View File

@ -4,6 +4,8 @@
namespace DB namespace DB
{ {
/// Class that stores constant index granularity for whole part, except
/// last non-zero granule and final granule which always has zero rows.
class MergeTreeIndexGranularityConstant : public MergeTreeIndexGranularity class MergeTreeIndexGranularityConstant : public MergeTreeIndexGranularity
{ {
private: private:
@ -13,6 +15,8 @@ private:
size_t num_marks_without_final = 0; size_t num_marks_without_final = 0;
bool has_final_mark = false; bool has_final_mark = false;
size_t getMarkUpperBoundForRow(size_t row_index) const;
public: public:
MergeTreeIndexGranularityConstant() = default; MergeTreeIndexGranularityConstant() = default;
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_); explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
@ -27,7 +31,6 @@ public:
size_t getTotalRows() const override; size_t getTotalRows() const override;
size_t getMarkRows(size_t mark_index) const override; size_t getMarkRows(size_t mark_index) const override;
size_t getMarkStartingRow(size_t mark_index) const override;
bool hasFinalMark() const override { return has_final_mark; } bool hasFinalMark() const override { return has_final_mark; }
void appendMark(size_t rows_count) override; void appendMark(size_t rows_count) override;

View File

@ -4,7 +4,6 @@
#include <Columns/FilterDescription.h> #include <Columns/FilterDescription.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include "Common/Logger.h"
#include <Common/TargetSpecific.h> #include <Common/TargetSpecific.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Core/UUID.h> #include <Core/UUID.h>

View File

@ -35,7 +35,6 @@
#include <DataTypes/DataTypeVariant.h> #include <DataTypes/DataTypeVariant.h>
#include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/replace.hpp>
#include <Common/ProfileEventsScope.h> #include <Common/ProfileEventsScope.h>
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
@ -75,6 +74,7 @@ namespace MergeTreeSetting
extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization; extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization;
extern const MergeTreeSettingsBool replace_long_file_name_to_hash; extern const MergeTreeSettingsBool replace_long_file_name_to_hash;
extern const MergeTreeSettingsBool ttl_only_drop_parts; extern const MergeTreeSettingsBool ttl_only_drop_parts;
extern const MergeTreeSettingsBool enable_index_granularity_compression;
} }
namespace ErrorCodes namespace ErrorCodes
@ -985,12 +985,19 @@ void finalizeMutatedPart(
new_data_part->rows_count = source_part->rows_count; new_data_part->rows_count = source_part->rows_count;
new_data_part->index_granularity = source_part->index_granularity; new_data_part->index_granularity = source_part->index_granularity;
/// Just in case
new_data_part->index_granularity->shrinkToFitInMemory();
new_data_part->setIndex(*source_part->getIndex()); new_data_part->setIndex(*source_part->getIndex());
new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->minmax_idx = source_part->minmax_idx;
new_data_part->modification_time = time(nullptr); new_data_part->modification_time = time(nullptr);
if ((*new_data_part->storage.getSettings())[MergeTreeSetting::enable_index_granularity_compression])
{
if (auto new_index_granularity = new_data_part->index_granularity->optimize())
new_data_part->index_granularity = std::move(new_index_granularity);
}
/// Just in case
new_data_part->index_granularity->shrinkToFitInMemory();
/// Load rest projections which are hardlinked /// Load rest projections which are hardlinked
bool noop; bool noop;
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);

View File

@ -11,8 +11,8 @@ all_2_2_0 5 2 25
all_2_2_0 6 2 27 all_2_2_0 6 2 27
all_2_2_0 7 1 29 all_2_2_0 7 1 29
all_2_2_0 8 0 29 all_2_2_0 8 0 29
all_1_1_0 24 all_1_1_0 25
all_2_2_0 72 all_2_2_0 25
adaptive non-const, after merge adaptive non-const, after merge
all_1_2_1 0 10 0 all_1_2_1 0 10 0
all_1_2_1 1 5 10 all_1_2_1 1 5 10