mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #14438 from ClickHouse/apply_ttl_if_not_calculated
Apply TTL if it's not calculated for part
This commit is contained in:
commit
f8aa6cfe8a
@ -1042,6 +1042,37 @@ void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) co
|
||||
column_to_size[column_name] = size.data_compressed;
|
||||
}
|
||||
|
||||
|
||||
bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const
|
||||
{
|
||||
if (!metadata_snapshot->hasAnyTTL())
|
||||
return false;
|
||||
|
||||
if (metadata_snapshot->hasRowsTTL())
|
||||
{
|
||||
if (isEmpty()) /// All rows were finally deleted and we don't store TTL
|
||||
return true;
|
||||
else if (ttl_infos.table_ttl.min == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto & [column, desc] : metadata_snapshot->getColumnTTLs())
|
||||
{
|
||||
/// Part has this column, but we don't calculated TTL for it
|
||||
if (!ttl_infos.columns_ttl.count(column) && getColumns().contains(column))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto & move_desc : metadata_snapshot->getMoveTTLs())
|
||||
{
|
||||
/// Move TTL is not calculated
|
||||
if (!ttl_infos.moves_ttl.count(move_desc.result_column))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
||||
{
|
||||
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
|
||||
|
@ -344,6 +344,11 @@ public:
|
||||
|
||||
static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME = "delete-on-destroy.txt";
|
||||
|
||||
/// Checks that all TTLs (table min/max, column ttls, so on) for part
|
||||
/// calculated. Part without calculated TTL may exist if TTL was added after
|
||||
/// part creation (using alter query with materialize_ttl setting).
|
||||
bool checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const;
|
||||
|
||||
protected:
|
||||
|
||||
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
|
||||
|
@ -651,8 +651,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
new_data_part->is_temp = true;
|
||||
|
||||
bool need_remove_expired_values = false;
|
||||
bool force_ttl = false;
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
new_data_part->ttl_infos.update(part->ttl_infos);
|
||||
if (metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(metadata_snapshot))
|
||||
{
|
||||
LOG_INFO(log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name);
|
||||
need_remove_expired_values = true;
|
||||
force_ttl = true;
|
||||
}
|
||||
}
|
||||
|
||||
const auto & part_min_ttl = new_data_part->ttl_infos.part_min_ttl;
|
||||
if (part_min_ttl && part_min_ttl <= time_of_merge)
|
||||
@ -825,7 +834,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());
|
||||
|
||||
if (need_remove_expired_values)
|
||||
merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, false);
|
||||
merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl);
|
||||
|
||||
|
||||
if (metadata_snapshot->hasSecondaryIndices())
|
||||
|
@ -38,7 +38,7 @@ struct MergeTreeDataPartTTLInfos
|
||||
MergeTreeDataPartTTLInfo table_ttl;
|
||||
|
||||
/// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
|
||||
/// to merge in order to remove expired rows.
|
||||
/// to merge in order to remove expired rows.
|
||||
time_t part_min_ttl = 0;
|
||||
time_t part_max_ttl = 0;
|
||||
|
||||
@ -58,7 +58,7 @@ struct MergeTreeDataPartTTLInfos
|
||||
part_max_ttl = time_max;
|
||||
}
|
||||
|
||||
bool empty()
|
||||
bool empty() const
|
||||
{
|
||||
return !part_min_ttl && moves_ttl.empty();
|
||||
}
|
||||
|
@ -0,0 +1,5 @@
|
||||
3000
|
||||
3000
|
||||
2000
|
||||
2000
|
||||
1001
|
@ -0,0 +1,37 @@
|
||||
DROP TABLE IF EXISTS table_for_ttl;
|
||||
|
||||
CREATE TABLE table_for_ttl(
|
||||
d DateTime,
|
||||
key UInt64,
|
||||
value String)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY tuple()
|
||||
PARTITION BY key;
|
||||
|
||||
INSERT INTO table_for_ttl SELECT now() - INTERVAL 2 YEAR, 1, toString(number) from numbers(1000);
|
||||
|
||||
INSERT INTO table_for_ttl SELECT now() - INTERVAL 2 DAY, 3, toString(number) from numbers(2000, 1000);
|
||||
|
||||
INSERT INTO table_for_ttl SELECT now(), 4, toString(number) from numbers(3000, 1000);
|
||||
|
||||
SELECT count() FROM table_for_ttl;
|
||||
|
||||
ALTER TABLE table_for_ttl MODIFY TTL d + INTERVAL 1 YEAR SETTINGS materialize_ttl_after_modify = 0;
|
||||
|
||||
SELECT count() FROM table_for_ttl;
|
||||
|
||||
OPTIMIZE TABLE table_for_ttl FINAL;
|
||||
|
||||
SELECT count() FROM table_for_ttl;
|
||||
|
||||
ALTER TABLE table_for_ttl MODIFY COLUMN value String TTL d + INTERVAL 1 DAY SETTINGS materialize_ttl_after_modify = 0;
|
||||
|
||||
SELECT count(distinct value) FROM table_for_ttl;
|
||||
|
||||
OPTIMIZE TABLE table_for_ttl FINAL;
|
||||
|
||||
SELECT count(distinct value) FROM table_for_ttl;
|
||||
|
||||
OPTIMIZE TABLE table_for_ttl FINAL; -- Just check in logs, that it doesn't run with force again
|
||||
|
||||
DROP TABLE IF EXISTS table_for_ttl;
|
Loading…
Reference in New Issue
Block a user