From a2c040111c67b45c685a091e0dd617dc875593c6 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 17:45:03 +0200 Subject: [PATCH 1/3] Improve ReplacingMergeTree is_deleted documentation --- .../mergetree-family/replacingmergetree.md | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index a6258bcd581..8ec34dea58e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -45,7 +45,7 @@ When merging, `ReplacingMergeTree` from all the rows with the same sorting key l - The last in the selection, if `ver` not set. A selection is a set of rows in a set of parts participating in the merge. The most recently created part (the last insert) will be the last one in the selection. Thus, after deduplication, the very last row from the most recent insert will remain for each unique sorting key. - With the maximum version, if `ver` specified. If `ver` is the same for several rows, then it will use "if `ver` is not specified" rule for them, i.e. the most recent inserted row will remain. -Example: +Example: ```sql -- without ver - the last inserted 'wins' @@ -90,14 +90,14 @@ SELECT * FROM mySecondReplacingMT FINAL; ### is_deleted -`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a "deleted" row, `0` is a "state" row. Column data type — `UInt8`. :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used. +The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keywork is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. @@ -114,25 +114,31 @@ CREATE OR REPLACE TABLE myThirdReplacingMT `is_deleted` UInt8 ) ENGINE = ReplacingMergeTree(eventTime, is_deleted) -ORDER BY key; +ORDER BY key +SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); select * from myThirdReplacingMT final; 0 rows in set. Elapsed: 0.003 sec. --- delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; +-- A simple optimize + final does not delete rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL; -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); - -select * from myThirdReplacingMT final; +select * from myThirdReplacingMT; ┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +│ 1 │ first │ 2020-01-01 01:01:01 │ 1 │ └─────┴─────────┴─────────────────────┴────────────┘ + +-- A cleanup optimize deletes rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; + +select * from myThirdReplacingMT; + +0 rows in set. Elapsed: 0.002 sec. ``` ## Query clauses From acba6fd7a20ad44b29d373b5b44b26675444eaa3 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 17:57:24 +0200 Subject: [PATCH 2/3] Fix typo --- .../table-engines/mergetree-family/replacingmergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 8ec34dea58e..58fa2829a64 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -97,7 +97,7 @@ SELECT * FROM mySecondReplacingMT FINAL; :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keywork is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. +The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keyword is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. From 16889ff0324bc607b8b376ee1abba7bb990b9b91 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 18:03:51 +0200 Subject: [PATCH 3/3] Rollback doc example --- .../mergetree-family/replacingmergetree.md | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 58fa2829a64..5a0a2691a9e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -124,21 +124,16 @@ select * from myThirdReplacingMT final; 0 rows in set. Elapsed: 0.003 sec. --- A simple optimize + final does not delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL; - -select * from myThirdReplacingMT; - -┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 01:01:01 │ 1 │ -└─────┴─────────┴─────────────────────┴────────────┘ - --- A cleanup optimize deletes rows with is_deleted +-- delete rows with is_deleted OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; -select * from myThirdReplacingMT; +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); -0 rows in set. Elapsed: 0.002 sec. +select * from myThirdReplacingMT final; + +┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ +│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +└─────┴─────────┴─────────────────────┴────────────┘ ``` ## Query clauses