mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-14 02:12:21 +00:00
Do not load useless columns from the index in memory
This commit is contained in:
parent
64a80f1011
commit
e98d09c93e
2
contrib/rapidjson
vendored
2
contrib/rapidjson
vendored
@ -1 +1 @@
|
||||
Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa
|
||||
Subproject commit a9bc56c9165f1dbbbcada64221bd3a59042c5b95
|
@ -32,9 +32,9 @@ std::string toString(const Values & value)
|
||||
|
||||
int compareValues(const Values & lhs, const Values & rhs)
|
||||
{
|
||||
chassert(lhs.size() == rhs.size());
|
||||
size_t size = std::min(lhs.size(), rhs.size());
|
||||
|
||||
for (size_t i = 0; i < lhs.size(); ++i)
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i]))
|
||||
return -1;
|
||||
@ -55,8 +55,9 @@ public:
|
||||
Values getValue(size_t part_idx, size_t mark) const
|
||||
{
|
||||
const auto & index = parts[part_idx].data_part->getIndex();
|
||||
Values values(index.size());
|
||||
for (size_t i = 0; i < values.size(); ++i)
|
||||
size_t size = index.size();
|
||||
Values values(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
index[i]->get(mark, values[i]);
|
||||
if (values[i].isNull())
|
||||
|
@ -869,6 +869,27 @@ void IMergeTreeDataPart::loadIndex() const
|
||||
for (size_t j = 0; j < key_size; ++j)
|
||||
key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {});
|
||||
|
||||
/// Cut useless suffix columns, if necessary.
|
||||
Float64 ratio_to_drop_suffix_columns = storage.getSettings()->primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns;
|
||||
if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1)
|
||||
{
|
||||
chassert(marks_count > 0);
|
||||
for (size_t j = 0; j < key_size - 1; ++j)
|
||||
{
|
||||
size_t num_changes = 0;
|
||||
for (size_t i = 1; i < marks_count; ++i)
|
||||
if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0))
|
||||
++num_changes;
|
||||
|
||||
if (static_cast<Float64>(num_changes) / marks_count >= ratio_to_drop_suffix_columns)
|
||||
{
|
||||
key_size = j + 1;
|
||||
loaded_index.resize(key_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < key_size; ++i)
|
||||
{
|
||||
loaded_index[i]->shrinkToFit();
|
||||
|
@ -1110,7 +1110,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
DataTypes key_types;
|
||||
for (size_t i : key_indices)
|
||||
{
|
||||
index_columns->emplace_back(ColumnWithTypeAndName{index[i], primary_key.data_types[i], primary_key.column_names[i]});
|
||||
if (i < index.size())
|
||||
index_columns->emplace_back(index[i], primary_key.data_types[i], primary_key.column_names[i]);
|
||||
else
|
||||
index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it.
|
||||
|
||||
key_types.emplace_back(primary_key.data_types[i]);
|
||||
}
|
||||
|
||||
@ -1119,7 +1123,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
std::function<void(size_t, size_t, FieldRef &)> create_field_ref;
|
||||
if (key_condition.hasMonotonicFunctionsChain())
|
||||
{
|
||||
|
||||
create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field)
|
||||
{
|
||||
field = {index_columns.get(), row, column};
|
||||
@ -1159,7 +1162,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
{
|
||||
for (size_t i = 0; i < used_key_size; ++i)
|
||||
{
|
||||
create_field_ref(range.begin, i, index_left[i]);
|
||||
if ((*index_columns)[i].column)
|
||||
create_field_ref(range.begin, i, index_left[i]);
|
||||
else
|
||||
index_left[i] = NEGATIVE_INFINITY;
|
||||
|
||||
index_right[i] = POSITIVE_INFINITY;
|
||||
}
|
||||
}
|
||||
@ -1170,8 +1177,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
|
||||
for (size_t i = 0; i < used_key_size; ++i)
|
||||
{
|
||||
create_field_ref(range.begin, i, index_left[i]);
|
||||
create_field_ref(range.end, i, index_right[i]);
|
||||
if ((*index_columns)[i].column)
|
||||
{
|
||||
create_field_ref(range.begin, i, index_left[i]);
|
||||
create_field_ref(range.end, i, index_right[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If the PK column was not loaded in memory - exclude it from the analysis.
|
||||
index_left[i] = NEGATIVE_INFINITY;
|
||||
index_right[i] = POSITIVE_INFINITY;
|
||||
}
|
||||
}
|
||||
}
|
||||
key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types);
|
||||
|
@ -202,7 +202,7 @@ struct Settings;
|
||||
M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \
|
||||
M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \
|
||||
M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \
|
||||
\
|
||||
M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \
|
||||
/** Projection settings. */ \
|
||||
M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user