add setting remove_empty_parts

This commit is contained in:
Anton Popov 2020-11-15 05:24:47 +03:00
parent 2c0ab53492
commit de5ead0c40
8 changed files with 19 additions and 10 deletions

View File

@ -1217,6 +1217,9 @@ void MergeTreeData::clearOldWriteAheadLogs()
void MergeTreeData::clearEmptyParts()
{
if (!getSettings()->remove_empty_parts)
return;
auto parts = getDataPartsVector();
for (const auto & part : parts)
{

View File

@ -105,6 +105,7 @@ struct Settings;
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
\
/** Settings for testing purposes */ \
M(Bool, randomize_part_type, false, "For testing purposes only. Randomizes part type between wide and compact", 0) \

View File

@ -1,4 +1,4 @@
CREATE TABLE default.ttl\n(\n `d` Date,\n `a` Int32\n)\nENGINE = MergeTree\nPARTITION BY toDayOfMonth(d)\nORDER BY a\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192
CREATE TABLE default.ttl\n(\n `d` Date,\n `a` Int32\n)\nENGINE = MergeTree\nPARTITION BY toDayOfMonth(d)\nORDER BY a\nTTL d + toIntervalDay(1)\nSETTINGS remove_empty_parts = 0, index_granularity = 8192
2100-10-10 3
2100-10-10 4
d Date

View File

@ -2,14 +2,13 @@ set send_logs_level = 'fatal';
drop table if exists ttl;
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d);
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) settings remove_empty_parts = 0;
alter table ttl modify ttl d + interval 1 day;
show create table ttl;
insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1);
insert into ttl values (toDateTime('2000-10-10 00:00:00'), 2);
insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3);
insert into ttl values (toDateTime('2100-10-10 00:00:00'), 4);
select sleep(1) format Null; -- wait if very fast merge happen
optimize table ttl partition 10 final;
select * from ttl order by d;
@ -18,7 +17,7 @@ alter table ttl modify ttl a; -- { serverError 450 }
drop table if exists ttl;
create table ttl (d Date, a Int) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
create table ttl (d Date, a Int) engine = MergeTree order by tuple() partition by toDayOfMonth(d) settings remove_empty_parts = 0;
alter table ttl modify column a Int ttl d + interval 1 day;
desc table ttl;
alter table ttl modify column d Int ttl d + interval 1 day; -- { serverError 43 }

View File

@ -11,7 +11,9 @@ select a, b from ttl_00933_1;
drop table if exists ttl_00933_1;
create table ttl_00933_1 (d DateTime, a Int, b Int) engine = MergeTree order by toDate(d) partition by tuple() ttl d + interval 1 second;
create table ttl_00933_1 (d DateTime, a Int, b Int)
engine = MergeTree order by toDate(d) partition by tuple() ttl d + interval 1 second
settings remove_empty_parts = 0;
insert into ttl_00933_1 values (now(), 1, 2);
insert into ttl_00933_1 values (now(), 3, 4);
insert into ttl_00933_1 values (now() + 1000, 5, 6);
@ -30,7 +32,9 @@ select * from ttl_00933_1 order by d;
drop table if exists ttl_00933_1;
create table ttl_00933_1 (d DateTime, a Int) engine = MergeTree order by tuple() partition by tuple() ttl d + interval 1 day;
create table ttl_00933_1 (d DateTime, a Int)
engine = MergeTree order by tuple() partition by tuple() ttl d + interval 1 day
settings remove_empty_parts = 0;
insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 1);
insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 2);
insert into ttl_00933_1 values (toDateTime('2100-10-10 00:00:00'), 3);
@ -39,7 +43,9 @@ select * from ttl_00933_1 order by d;
drop table if exists ttl_00933_1;
create table ttl_00933_1 (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) ttl d + interval 1 day;
create table ttl_00933_1 (d Date, a Int)
engine = MergeTree order by a partition by toDayOfMonth(d) ttl d + interval 1 day
settings remove_empty_parts = 0;
insert into ttl_00933_1 values (toDate('2000-10-10'), 1);
insert into ttl_00933_1 values (toDate('2100-10-10'), 2);
optimize table ttl_00933_1 final;

View File

@ -1,6 +1,6 @@
drop table if exists ttl;
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d);
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) settings remove_empty_parts = 0;
insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1);
insert into ttl values (toDateTime('2000-10-10 00:00:00'), 2);
insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3);

View File

@ -16,7 +16,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part
ENGINE = MergeTree()
ORDER BY id
PARTITION BY id
SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0
SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0
"

View File

@ -1,6 +1,6 @@
DROP TABLE IF EXISTS column_size_bug;
CREATE TABLE column_size_bug (date_time DateTime, value SimpleAggregateFunction(sum,UInt64)) ENGINE = AggregatingMergeTree PARTITION BY toStartOfInterval(date_time, INTERVAL 1 DAY) ORDER BY (date_time);
CREATE TABLE column_size_bug (date_time DateTime, value SimpleAggregateFunction(sum,UInt64)) ENGINE = AggregatingMergeTree PARTITION BY toStartOfInterval(date_time, INTERVAL 1 DAY) ORDER BY (date_time) SETTINGS remove_empty_parts = 0;
INSERT INTO column_size_bug VALUES(now(),1);
INSERT INTO column_size_bug VALUES(now(),1);