ClickHouse/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
Azat Khuzhin 195b4c47ea Avoid processing per-column TTL multiple times
Before this patch ttl.txt will not be written for per-column TTLs, and
hence it will be calculated every time after server restart of
DETACH/ATTACH cycle (note, that it will work w/o restart since in-memory
representation will avoid this).

v2: convert test to .sh to get correct current database over default for MV
v3: extract UUID to avoid error like in [1]:
    [ 490 ] {} <Error> void DB::SystemLog<DB::TextLogElement>::flushImpl(const std::vector<LogElement> &, uint64_t) []: Code: 349. DB::Exception: Cannot convert NULL value to non-Nullable type: While processing query_id LIKE concat('%', CAST(_CAST(NULL, 'Nullable(UUID)') AS uuid, 'String'), '%'): while pushing to view test_0hc2ro.this_text_log (c64e5af4-059e-4330-a728-354ecf83c031). (CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN)
    [1]: https://s3.amazonaws.com/clickhouse-test-reports/35820/a512d322b024d37d2f1082c4833f59f86057555f/stateless_tests_flaky_check__address__actions_.html
v4: add no-parallel to avoid issues with disappeared underlying table while pushing to text_log
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-04-02 15:19:39 +03:00

92 lines
2.6 KiB
C++

#pragma once
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
#include <Storages/TTLDescription.h>
#include <map>
namespace DB
{
/// Minimal and maximal ttl for column or table
struct MergeTreeDataPartTTLInfo
{
time_t min = 0;
time_t max = 0;
/// This TTL was computed on completely expired part. It doesn't make sense
/// to select such parts for TTL again. But make sense to recalcuate TTL
/// again for merge with multiple parts.
bool finished = false;
void update(time_t time)
{
if (time && (!min || time < min))
min = time;
max = std::max(time, max);
}
void update(const MergeTreeDataPartTTLInfo & other_info)
{
if (other_info.min && (!min || other_info.min < min))
min = other_info.min;
max = std::max(other_info.max, max);
finished &= other_info.finished;
}
};
/// Order is important as it would be serialized and hashed for checksums
using TTLInfoMap = std::map<String, MergeTreeDataPartTTLInfo>;
/// PartTTLInfo for all columns and table with minimal ttl for whole part
struct MergeTreeDataPartTTLInfos
{
TTLInfoMap columns_ttl;
MergeTreeDataPartTTLInfo table_ttl;
/// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
/// to merge in order to remove expired rows.
time_t part_min_ttl = 0;
time_t part_max_ttl = 0;
TTLInfoMap rows_where_ttl;
TTLInfoMap moves_ttl;
TTLInfoMap recompression_ttl;
TTLInfoMap group_by_ttl;
/// Return the smallest max recompression TTL value
time_t getMinimalMaxRecompressionTTL() const;
void read(ReadBuffer & in);
void write(WriteBuffer & out) const;
void update(const MergeTreeDataPartTTLInfos & other_infos);
/// Has any TTLs which are not calculated on completely expired parts.
bool hasAnyNonFinishedTTLs() const;
void updatePartMinMaxTTL(time_t time_min, time_t time_max)
{
if (time_min && (!part_min_ttl || time_min < part_min_ttl))
part_min_ttl = time_min;
if (time_max && (!part_max_ttl || time_max > part_max_ttl))
part_max_ttl = time_max;
}
bool empty() const
{
/// part_min_ttl in minimum of rows, rows_where and group_by TTLs
return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty() && columns_ttl.empty();
}
};
/// Selects the most appropriate TTLDescription using TTL info and current time.
std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max);
}