mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
fix index granularity
This commit is contained in:
parent
c5817d528c
commit
3bba64e975
@ -41,6 +41,11 @@ size_t MergeTreeIndexGranularity::getMarksCountWithoutFinal() const
|
|||||||
return total - hasFinalMark();
|
return total - hasFinalMark();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t MergeTreeIndexGranularity::getMarkStartingRow(size_t mark_index) const
|
||||||
|
{
|
||||||
|
return getRowsCountInRange(0, mark_index);
|
||||||
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularity::getLastMarkRows() const
|
size_t MergeTreeIndexGranularity::getLastMarkRows() const
|
||||||
{
|
{
|
||||||
return getMarkRows(getMarksCount() - 1);
|
return getMarkRows(getMarksCount() - 1);
|
||||||
@ -48,7 +53,7 @@ size_t MergeTreeIndexGranularity::getLastMarkRows() const
|
|||||||
|
|
||||||
size_t MergeTreeIndexGranularity::getLastNonFinalMarkRows() const
|
size_t MergeTreeIndexGranularity::getLastNonFinalMarkRows() const
|
||||||
{
|
{
|
||||||
size_t last_mark_rows = getLastMarkRows();
|
size_t last_mark_rows = getMarkRows(getMarksCount() - 1);
|
||||||
if (last_mark_rows != 0)
|
if (last_mark_rows != 0)
|
||||||
return last_mark_rows;
|
return last_mark_rows;
|
||||||
return getMarkRows(getMarksCount() - 2);
|
return getMarkRows(getMarksCount() - 2);
|
||||||
@ -140,5 +145,4 @@ MergeTreeIndexGranularityPtr createMergeTreeIndexGranularity(
|
|||||||
return std::make_shared<MergeTreeIndexGranularityConstant>(computed_granularity);
|
return std::make_shared<MergeTreeIndexGranularityConstant>(computed_granularity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -5,12 +5,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/// Class contains information about index granularity in rows of IMergeTreeDataPart
|
/// Class that contains information about index granularity in rows of IMergeTreeDataPart
|
||||||
/// Inside it contains vector of partial sums of rows after mark:
|
|
||||||
/// |-----|---|----|----|
|
|
||||||
/// | 5 | 8 | 12 | 16 |
|
|
||||||
/// If user doesn't specify setting index_granularity_bytes for MergeTree* table
|
|
||||||
/// all values in inner vector would have constant stride (default 8192).
|
|
||||||
class MergeTreeIndexGranularity
|
class MergeTreeIndexGranularity
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -49,7 +44,7 @@ public:
|
|||||||
virtual size_t getMarkRows(size_t mark_index) const = 0;
|
virtual size_t getMarkRows(size_t mark_index) const = 0;
|
||||||
|
|
||||||
/// Return amount of rows before mark
|
/// Return amount of rows before mark
|
||||||
virtual size_t getMarkStartingRow(size_t mark_index) const = 0;
|
size_t getMarkStartingRow(size_t mark_index) const;
|
||||||
|
|
||||||
/// Amount of rows after last mark
|
/// Amount of rows after last mark
|
||||||
size_t getLastMarkRows() const;
|
size_t getLastMarkRows() const;
|
||||||
|
@ -27,22 +27,11 @@ size_t MergeTreeIndexGranularityAdaptive::getMarkRows(size_t mark_index) const
|
|||||||
return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1];
|
return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityAdaptive::getMarkStartingRow(size_t mark_index) const
|
|
||||||
{
|
|
||||||
if (mark_index > getMarksCount())
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
|
|
||||||
|
|
||||||
if (mark_index == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return marks_rows_partial_sums[mark_index - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MergeTreeIndexGranularityAdaptive::hasFinalMark() const
|
bool MergeTreeIndexGranularityAdaptive::hasFinalMark() const
|
||||||
{
|
{
|
||||||
if (marks_rows_partial_sums.empty())
|
if (marks_rows_partial_sums.empty())
|
||||||
return false;
|
return false;
|
||||||
return marks_rows_partial_sums.back() == 0;
|
return getLastMarkRows() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityAdaptive::getMarksCount() const
|
size_t MergeTreeIndexGranularityAdaptive::getMarksCount() const
|
||||||
@ -92,9 +81,16 @@ void MergeTreeIndexGranularityAdaptive::adjustLastMark(size_t rows_count)
|
|||||||
|
|
||||||
size_t MergeTreeIndexGranularityAdaptive::getRowsCountInRange(size_t begin, size_t end) const
|
size_t MergeTreeIndexGranularityAdaptive::getRowsCountInRange(size_t begin, size_t end) const
|
||||||
{
|
{
|
||||||
|
if (end > getMarksCount())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
|
||||||
|
|
||||||
|
if (end == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
size_t subtrahend = 0;
|
size_t subtrahend = 0;
|
||||||
if (begin != 0)
|
if (begin != 0)
|
||||||
subtrahend = marks_rows_partial_sums[begin - 1];
|
subtrahend = marks_rows_partial_sums[begin - 1];
|
||||||
|
|
||||||
return marks_rows_partial_sums[end - 1] - subtrahend;
|
return marks_rows_partial_sums[end - 1] - subtrahend;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,6 +122,7 @@ uint64_t MergeTreeIndexGranularityAdaptive::getBytesSize() const
|
|||||||
{
|
{
|
||||||
return marks_rows_partial_sums.size() * sizeof(size_t);
|
return marks_rows_partial_sums.size() * sizeof(size_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t MergeTreeIndexGranularityAdaptive::getBytesAllocated() const
|
uint64_t MergeTreeIndexGranularityAdaptive::getBytesAllocated() const
|
||||||
{
|
{
|
||||||
return marks_rows_partial_sums.capacity() * sizeof(size_t);
|
return marks_rows_partial_sums.capacity() * sizeof(size_t);
|
||||||
|
@ -4,6 +4,10 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/// Class that stores adaptive index granularity.
|
||||||
|
/// Inside it contains vector of partial sums of rows after mark:
|
||||||
|
/// |-----|---|----|----|
|
||||||
|
/// | 5 | 8 | 12 | 16 |
|
||||||
class MergeTreeIndexGranularityAdaptive : public MergeTreeIndexGranularity
|
class MergeTreeIndexGranularityAdaptive : public MergeTreeIndexGranularity
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -19,7 +23,6 @@ public:
|
|||||||
size_t getTotalRows() const override;
|
size_t getTotalRows() const override;
|
||||||
|
|
||||||
size_t getMarkRows(size_t mark_index) const override;
|
size_t getMarkRows(size_t mark_index) const override;
|
||||||
size_t getMarkStartingRow(size_t mark_index) const override;
|
|
||||||
bool hasFinalMark() const override;
|
bool hasFinalMark() const override;
|
||||||
|
|
||||||
void appendMark(size_t rows_count) override;
|
void appendMark(size_t rows_count) override;
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -38,22 +37,6 @@ size_t MergeTreeIndexGranularityConstant::getMarkRows(size_t mark_index) const
|
|||||||
return 0; // Final mark.
|
return 0; // Final mark.
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityConstant::getMarkStartingRow(size_t mark_index) const
|
|
||||||
{
|
|
||||||
if (mark_index > getMarksCount())
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
|
|
||||||
|
|
||||||
size_t total_rows = 0;
|
|
||||||
if (mark_index >= num_marks_without_final && mark_index != 0)
|
|
||||||
{
|
|
||||||
total_rows += last_mark_granularity;
|
|
||||||
mark_index = num_marks_without_final - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
total_rows += constant_granularity * mark_index;
|
|
||||||
return total_rows;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityConstant::getMarksCount() const
|
size_t MergeTreeIndexGranularityConstant::getMarksCount() const
|
||||||
{
|
{
|
||||||
return num_marks_without_final + has_final_mark;
|
return num_marks_without_final + has_final_mark;
|
||||||
@ -104,8 +87,14 @@ void MergeTreeIndexGranularityConstant::adjustLastMark(size_t rows_count)
|
|||||||
|
|
||||||
size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size_t end) const
|
size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size_t end) const
|
||||||
{
|
{
|
||||||
|
if (end > getMarksCount())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
|
||||||
|
|
||||||
|
if (end == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
size_t total_rows = 0;
|
size_t total_rows = 0;
|
||||||
if (end >= num_marks_without_final && end != 0)
|
if (end >= num_marks_without_final)
|
||||||
{
|
{
|
||||||
total_rows += last_mark_granularity;
|
total_rows += last_mark_granularity;
|
||||||
end = num_marks_without_final - 1;
|
end = num_marks_without_final - 1;
|
||||||
@ -115,27 +104,31 @@ size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size
|
|||||||
return total_rows;
|
return total_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t MergeTreeIndexGranularityConstant::getMarkUpperBoundForRow(size_t row_index) const
|
||||||
|
{
|
||||||
|
size_t num_rows_with_constant_granularity = (num_marks_without_final - 1) * constant_granularity;
|
||||||
|
|
||||||
|
/// All granules with constant granularity + last granule + final granule
|
||||||
|
if (row_index >= num_rows_with_constant_granularity)
|
||||||
|
return getMarksCount();
|
||||||
|
|
||||||
|
return (row_index + constant_granularity - 1) / constant_granularity;
|
||||||
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const
|
size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const
|
||||||
{
|
{
|
||||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||||
size_t last_row_pos = rows_before_mark + number_of_rows;
|
size_t last_row_pos = rows_before_mark + number_of_rows;
|
||||||
|
|
||||||
if (last_row_pos >= (num_marks_without_final - 1) * constant_granularity)
|
return getMarkUpperBoundForRow(last_row_pos) - from_mark;
|
||||||
return num_marks_without_final - from_mark;
|
|
||||||
|
|
||||||
return (last_row_pos + constant_granularity - 1) / constant_granularity - from_mark;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MergeTreeIndexGranularityConstant::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
|
size_t MergeTreeIndexGranularityConstant::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
|
||||||
{
|
{
|
||||||
UNUSED(from_mark, number_of_rows, offset_in_rows);
|
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||||
return 0;
|
size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
|
||||||
// size_t rows_before_mark = getMarkStartingRow(from_mark);
|
|
||||||
// size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
|
|
||||||
// auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
|
|
||||||
// size_t to_mark = it - marks_rows_partial_sums.begin();
|
|
||||||
|
|
||||||
// return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows;
|
return getRowsCountInRange(from_mark, std::max(1UL, getMarkUpperBoundForRow(last_row_pos))) - offset_in_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string MergeTreeIndexGranularityConstant::describe() const
|
std::string MergeTreeIndexGranularityConstant::describe() const
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/// Class that stores constant index granularity for whole part, except
|
||||||
|
/// last non-zero granule and final granule which always has zero rows.
|
||||||
class MergeTreeIndexGranularityConstant : public MergeTreeIndexGranularity
|
class MergeTreeIndexGranularityConstant : public MergeTreeIndexGranularity
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
@ -13,6 +15,8 @@ private:
|
|||||||
size_t num_marks_without_final = 0;
|
size_t num_marks_without_final = 0;
|
||||||
bool has_final_mark = false;
|
bool has_final_mark = false;
|
||||||
|
|
||||||
|
size_t getMarkUpperBoundForRow(size_t row_index) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MergeTreeIndexGranularityConstant() = default;
|
MergeTreeIndexGranularityConstant() = default;
|
||||||
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
|
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
|
||||||
@ -27,7 +31,6 @@ public:
|
|||||||
size_t getTotalRows() const override;
|
size_t getTotalRows() const override;
|
||||||
|
|
||||||
size_t getMarkRows(size_t mark_index) const override;
|
size_t getMarkRows(size_t mark_index) const override;
|
||||||
size_t getMarkStartingRow(size_t mark_index) const override;
|
|
||||||
bool hasFinalMark() const override { return has_final_mark; }
|
bool hasFinalMark() const override { return has_final_mark; }
|
||||||
|
|
||||||
void appendMark(size_t rows_count) override;
|
void appendMark(size_t rows_count) override;
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#include <Columns/FilterDescription.h>
|
#include <Columns/FilterDescription.h>
|
||||||
#include <Columns/ColumnConst.h>
|
#include <Columns/ColumnConst.h>
|
||||||
#include <Columns/ColumnsCommon.h>
|
#include <Columns/ColumnsCommon.h>
|
||||||
#include "Common/Logger.h"
|
|
||||||
#include <Common/TargetSpecific.h>
|
#include <Common/TargetSpecific.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
#include <Core/UUID.h>
|
#include <Core/UUID.h>
|
||||||
|
@ -35,7 +35,6 @@
|
|||||||
#include <DataTypes/DataTypeVariant.h>
|
#include <DataTypes/DataTypeVariant.h>
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
#include <boost/algorithm/string/replace.hpp>
|
||||||
#include <Common/ProfileEventsScope.h>
|
#include <Common/ProfileEventsScope.h>
|
||||||
#include "Storages/MergeTree/MergeTreeIndexGranularity.h"
|
|
||||||
#include <Core/ColumnsWithTypeAndName.h>
|
#include <Core/ColumnsWithTypeAndName.h>
|
||||||
|
|
||||||
|
|
||||||
@ -75,6 +74,7 @@ namespace MergeTreeSetting
|
|||||||
extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization;
|
extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization;
|
||||||
extern const MergeTreeSettingsBool replace_long_file_name_to_hash;
|
extern const MergeTreeSettingsBool replace_long_file_name_to_hash;
|
||||||
extern const MergeTreeSettingsBool ttl_only_drop_parts;
|
extern const MergeTreeSettingsBool ttl_only_drop_parts;
|
||||||
|
extern const MergeTreeSettingsBool enable_index_granularity_compression;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
@ -985,12 +985,19 @@ void finalizeMutatedPart(
|
|||||||
|
|
||||||
new_data_part->rows_count = source_part->rows_count;
|
new_data_part->rows_count = source_part->rows_count;
|
||||||
new_data_part->index_granularity = source_part->index_granularity;
|
new_data_part->index_granularity = source_part->index_granularity;
|
||||||
/// Just in case
|
|
||||||
new_data_part->index_granularity->shrinkToFitInMemory();
|
|
||||||
new_data_part->setIndex(*source_part->getIndex());
|
new_data_part->setIndex(*source_part->getIndex());
|
||||||
new_data_part->minmax_idx = source_part->minmax_idx;
|
new_data_part->minmax_idx = source_part->minmax_idx;
|
||||||
new_data_part->modification_time = time(nullptr);
|
new_data_part->modification_time = time(nullptr);
|
||||||
|
|
||||||
|
if ((*new_data_part->storage.getSettings())[MergeTreeSetting::enable_index_granularity_compression])
|
||||||
|
{
|
||||||
|
if (auto new_index_granularity = new_data_part->index_granularity->optimize())
|
||||||
|
new_data_part->index_granularity = std::move(new_index_granularity);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Just in case
|
||||||
|
new_data_part->index_granularity->shrinkToFitInMemory();
|
||||||
|
|
||||||
/// Load rest projections which are hardlinked
|
/// Load rest projections which are hardlinked
|
||||||
bool noop;
|
bool noop;
|
||||||
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
|
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
|
||||||
|
@ -11,8 +11,8 @@ all_2_2_0 5 2 25
|
|||||||
all_2_2_0 6 2 27
|
all_2_2_0 6 2 27
|
||||||
all_2_2_0 7 1 29
|
all_2_2_0 7 1 29
|
||||||
all_2_2_0 8 0 29
|
all_2_2_0 8 0 29
|
||||||
all_1_1_0 24
|
all_1_1_0 25
|
||||||
all_2_2_0 72
|
all_2_2_0 25
|
||||||
adaptive non-const, after merge
|
adaptive non-const, after merge
|
||||||
all_1_2_1 0 10 0
|
all_1_2_1 0 10 0
|
||||||
all_1_2_1 1 5 10
|
all_1_2_1 1 5 10
|
||||||
|
Loading…
Reference in New Issue
Block a user