mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
Do not write expired columns by TTL after merge w/o TTL
Usually second merge do not perform TTL, since everything is up to date, however in this case TTLTransform is not used, and hence expired_columns will not be filled for new part, and so those columns will be written with default values. Avoid this, by manually filling expired_columns. Here is a simpler reproducer: Simple reproducer: ```sql create table ttl_02262 (date Date, key Int, value String TTL date + interval 1 month) engine=MergeTree order by key settings min_bytes_for_wide_part=0, min_rows_for_wide_part=0; insert into ttl_02262 values ('2010-01-01', 2010, 'foo'); ``` ```sh # ls -l .server/data/default/ttl_02262/all_* .server/data/default/ttl_02262/all_1_1_0: total 48 -rw-r----- 1 root root 335 May 26 14:19 checksums.txt -rw-r----- 1 root root 76 May 26 14:19 columns.txt -rw-r----- 1 root root 1 May 26 14:19 count.txt -rw-r----- 1 root root 28 May 26 14:19 date.bin -rw-r----- 1 root root 48 May 26 14:19 date.mrk2 -rw-r----- 1 root root 10 May 26 14:19 default_compression_codec.txt -rw-r----- 1 root root 30 May 26 14:19 key.bin -rw-r----- 1 root root 48 May 26 14:19 key.mrk2 -rw-r----- 1 root root 8 May 26 14:19 primary.idx -rw-r----- 1 root root 99 May 26 14:19 ttl.txt -rw-r----- 1 root root 30 May 26 14:19 value.bin -rw-r----- 1 root root 48 May 26 14:19 value.mrk2 ``` ```sql optimize table ttl_02262 final; ``` ```sh .server/data/default/ttl_02262/all_1_1_1: total 40 -rw-r----- 1 root root 279 May 26 14:19 checksums.txt -rw-r----- 1 root root 61 May 26 14:19 columns.txt -rw-r----- 1 root root 1 May 26 14:19 count.txt -rw-r----- 1 root root 28 May 26 14:19 date.bin -rw-r----- 1 root root 48 May 26 14:19 date.mrk2 -rw-r----- 1 root root 10 May 26 14:19 default_compression_codec.txt -rw-r----- 1 root root 30 May 26 14:19 key.bin -rw-r----- 1 root root 48 May 26 14:19 key.mrk2 -rw-r----- 1 root root 8 May 26 14:19 primary.idx -rw-r----- 1 root root 81 May 26 14:19 ttl.txt ``` ```sql optimize table ttl_02262 final; ``` ```sh .server/data/default/ttl_02262/all_1_1_2: total 48 -rw-r----- 1 root root 349 May 26 14:20 checksums.txt -rw-r----- 1 root root 76 May 26 14:20 columns.txt -rw-r----- 1 root root 1 May 26 14:20 count.txt -rw-r----- 1 root root 28 May 26 14:20 date.bin -rw-r----- 1 root root 48 May 26 14:20 date.mrk2 -rw-r----- 1 root root 10 May 26 14:20 default_compression_codec.txt -rw-r----- 1 root root 30 May 26 14:20 key.bin -rw-r----- 1 root root 48 May 26 14:20 key.mrk2 -rw-r----- 1 root root 8 May 26 14:20 primary.idx -rw-r----- 1 root root 81 May 26 14:20 ttl.txt -rw-r----- 1 root root 27 May 26 14:20 value.bin -rw-r----- 1 root root 48 May 26 14:20 value.mrk2 ``` And now we have `value.*` for all_1_1_2, this should not happen. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
8328d7068b
commit
4288d09a85
@ -200,6 +200,20 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
ctx->need_remove_expired_values = false;
|
||||
}
|
||||
|
||||
/// Skip fully expired columns manually, since in case of need_remove_expired_values is not set,
|
||||
/// TTLTransform will not be used, and columns that had been removed by TTL will be added again with default values.
|
||||
if (!ctx->need_remove_expired_values)
|
||||
{
|
||||
for (auto & [column_name, ttl] : global_ctx->new_data_part->ttl_infos.columns_ttl)
|
||||
{
|
||||
if (ttl.finished())
|
||||
{
|
||||
global_ctx->new_data_part->expired_columns.insert(column_name);
|
||||
LOG_TRACE(ctx->log, "Adding expired column {} for {}", column_name, global_ctx->new_data_part->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count;
|
||||
ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed;
|
||||
global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm();
|
||||
|
29
tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh
Executable file
29
tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh
Executable file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL --path "$CLICKHOUSE_TEST_UNIQUE_NAME" -nm -q "
|
||||
create table ttl_02335 (
|
||||
date Date,
|
||||
key Int,
|
||||
value String TTL date + interval 1 month
|
||||
)
|
||||
engine=MergeTree
|
||||
order by key
|
||||
settings
|
||||
min_bytes_for_wide_part=0,
|
||||
min_rows_for_wide_part=0;
|
||||
|
||||
-- all_1_1_0
|
||||
-- all_1_1_1
|
||||
insert into ttl_02335 values ('2010-01-01', 2010, 'foo');
|
||||
-- all_1_1_2
|
||||
optimize table ttl_02335 final;
|
||||
-- all_1_1_3
|
||||
optimize table ttl_02335 final;
|
||||
"
|
||||
|
||||
test -f "$CLICKHOUSE_TEST_UNIQUE_NAME"/data/_local/ttl_02335/all_1_1_3/value.bin && echo "[FAIL] value column should not exist"
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user