diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 6de818c130f..9467da33398 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[SETTINGS name=value, clean_deleted_rows=value, ...] +[SETTINGS name=value, ...] ``` For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). @@ -88,53 +88,6 @@ SELECT * FROM mySecondReplacingMT FINAL; └─────┴─────────┴─────────────────────┘ ``` -### is_deleted - -`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. - - Column data type — `UInt8`. - -:::note -`is_deleted` can only be enabled when `ver` is used. - -The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. - -No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. - -::: - -Example: -```sql --- with ver and is_deleted -CREATE OR REPLACE TABLE myThirdReplacingMT -( - `key` Int64, - `someCol` String, - `eventTime` DateTime, - `is_deleted` UInt8 -) -ENGINE = ReplacingMergeTree(eventTime, is_deleted) -ORDER BY key; - -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); - -select * from myThirdReplacingMT final; - -0 rows in set. Elapsed: 0.003 sec. - --- delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; - -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); - -select * from myThirdReplacingMT final; - -┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ -└─────┴─────────┴─────────────────────┴────────────┘ -``` - ## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index c7e461d15ae..da049554c67 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -852,16 +852,6 @@ If the file name for column is too long (more than `max_file_name_length` bytes) The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127. -## clean_deleted_rows - -Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. - -Possible values: - -- `Always` or `Never`. - -Default value: `Never` - ## allow_experimental_block_number_column Persists virtual column `_block_number` on merges. diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index e8089b2c42b..c17e7982b98 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -86,59 +86,6 @@ SELECT * FROM mySecondReplacingMT FINAL; │ 1 │ first │ 2020-01-01 01:01:01 │ └─────┴─────────┴─────────────────────┘ ``` -### is_deleted - -`is_deleted` — Имя столбца, который используется во время слияния для обозначения того, нужно ли отображать строку или она подлежит удалению; `1` - для удаления строки, `0` - для отображения строки. - - Тип данных столбца — `UInt8`. - -:::note -`is_deleted` может быть использован, если `ver` используется. - -Строка удаляется в следующих случаях: - - - при использовании инструкции `OPTIMIZE ... FINAL CLEANUP` - - при использовании инструкции `OPTIMIZE ... FINAL` - - параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`) - - есть новые версии строки - -Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться. - -Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка. -::: - -Пример: - -```sql --- with ver and is_deleted -CREATE OR REPLACE TABLE myThirdReplacingMT -( - `key` Int64, - `someCol` String, - `eventTime` DateTime, - `is_deleted` UInt8 -) -ENGINE = ReplacingMergeTree(eventTime, is_deleted) -ORDER BY key; - -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); - -select * from myThirdReplacingMT final; - -0 rows in set. Elapsed: 0.003 sec. - --- delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; - -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); - -select * from myThirdReplacingMT final; - -┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ -└─────┴─────────┴─────────────────────┴────────────┘ -``` ## Секции запроса diff --git a/programs/server/config.d/graphite_alternative.xml b/programs/server/config.d/graphite_alternative.xml new file mode 120000 index 00000000000..400b9e75f1f --- /dev/null +++ b/programs/server/config.d/graphite_alternative.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/graphite_alternative.xml \ No newline at end of file diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index ee113a6776f..c35e69977ed 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -98,8 +98,6 @@ IMPLEMENT_SETTING_AUTO_ENUM(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_AUTO_ENUM(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS) -IMPLEMENT_SETTING_AUTO_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS) - IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, {"datetime64", MySQLDataTypesSupport::DATETIME64}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 7977a0b3ab6..2e71c96b954 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -140,14 +140,6 @@ enum class DefaultTableEngine DECLARE_SETTING_ENUM(DefaultTableEngine) -enum class CleanDeletedRows -{ - Never = 0, /// Disable. - Always, -}; - -DECLARE_SETTING_ENUM(CleanDeletedRows) - enum class MySQLDataTypesSupport { DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e376ab5f0bf..25146ebc10d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -33,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -45,7 +43,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index ae456e8b31d..6be78deb897 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -79,7 +79,7 @@ BlockIO InterpreterOptimizeQuery::execute() if (auto * snapshot_data = dynamic_cast(storage_snapshot->data.get())) snapshot_data->parts = {}; - table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, ast.cleanup, getContext()); + table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, getContext()); return {}; } diff --git a/src/Parsers/ASTOptimizeQuery.cpp b/src/Parsers/ASTOptimizeQuery.cpp index 173310f7930..720c7699fb6 100644 --- a/src/Parsers/ASTOptimizeQuery.cpp +++ b/src/Parsers/ASTOptimizeQuery.cpp @@ -24,9 +24,6 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt if (deduplicate) settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : ""); - if (cleanup) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " CLEANUP" << (settings.hilite ? hilite_none : ""); - if (deduplicate_by_columns) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTOptimizeQuery.h b/src/Parsers/ASTOptimizeQuery.h index 4c914c11912..584b2f38fe6 100644 --- a/src/Parsers/ASTOptimizeQuery.h +++ b/src/Parsers/ASTOptimizeQuery.h @@ -21,12 +21,10 @@ public: bool deduplicate = false; /// Deduplicate by columns. ASTPtr deduplicate_by_columns; - /// Delete 'is_deleted' data - bool cleanup = false; /** Get the text that identifies this element. */ String getID(char delim) const override { - return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "")+ (cleanup ? "_cleanup" : ""); + return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); } ASTPtr clone() const override diff --git a/src/Parsers/ParserOptimizeQuery.cpp b/src/Parsers/ParserOptimizeQuery.cpp index e887ff445d2..5d3b196caf8 100644 --- a/src/Parsers/ParserOptimizeQuery.cpp +++ b/src/Parsers/ParserOptimizeQuery.cpp @@ -39,7 +39,6 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ASTPtr partition; bool final = false; bool deduplicate = false; - bool cleanup = false; String cluster_str; if (!s_optimize_table.ignore(pos, expected)) @@ -70,9 +69,6 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (s_deduplicate.ignore(pos, expected)) deduplicate = true; - if (s_cleanup.ignore(pos, expected)) - cleanup = true; - ASTPtr deduplicate_by_columns; if (deduplicate && s_by.ignore(pos, expected)) { @@ -81,6 +77,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return false; } + /// Obsolete feature, ignored for backward compatibility. + s_cleanup.ignore(pos, expected); + auto query = std::make_shared(); node = query; @@ -90,7 +89,6 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte query->final = final; query->deduplicate = deduplicate; query->deduplicate_by_columns = deduplicate_by_columns; - query->cleanup = cleanup; query->database = database; query->table = table; diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index db770de858c..139ccd815d2 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -3,30 +3,22 @@ #include #include + namespace DB { -namespace ErrorCodes -{ - extern const int INCORRECT_DATA; -} - ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( const Block & header_, size_t num_inputs, SortDescription description_, - const String & is_deleted_column, const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_, - bool use_average_block_sizes, - bool cleanup_) + bool use_average_block_sizes) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) + , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes) { - if (!is_deleted_column.empty()) - is_deleted_column_number = header_.getPositionByName(is_deleted_column); if (!version_column.empty()) version_column_number = header_.getPositionByName(version_column); } @@ -73,15 +65,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// Write the data for the previous primary key. if (!selected_row.empty()) - { - if (is_deleted_column_number!=-1) - { - if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) - insertRow(); - } - else - insertRow(); - } + insertRow(); selected_row.clear(); } @@ -91,13 +75,6 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (out_row_sources_buf) current_row_sources.emplace_back(current.impl->order, true); - if ((is_deleted_column_number!=-1)) - { - const UInt8 is_deleted = assert_cast(*current->all_columns[is_deleted_column_number]).getData()[current->getRow()]; - if ((is_deleted != 1) && (is_deleted != 0)) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect data: is_deleted = {} (must be 1 or 0).", toString(is_deleted)); - } - /// A non-strict comparison, since we select the last row for the same version values. if (version_column_number == -1 || selected_row.empty() @@ -128,15 +105,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// We will write the data for the last primary key. if (!selected_row.empty()) - { - if (is_deleted_column_number!=-1) - { - if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) - insertRow(); - } - else - insertRow(); - } + insertRow(); return Status(merged_data.pull(), true); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index d57bab4708c..2295d1c35d1 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -21,13 +21,11 @@ public: ReplacingSortedAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, - const String & is_deleted_column, const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false, - bool cleanup = false); + bool use_average_block_sizes = false); const char * getName() const override { return "ReplacingSortedAlgorithm"; } Status merge() override; @@ -35,9 +33,7 @@ public: private: MergedData merged_data; - ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; - bool cleanup = false; using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 2; /// last, current. diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 9cd2f29a862..8d25d153cb4 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -14,24 +14,21 @@ public: ReplacingSortedTransform( const Block & header, size_t num_inputs, SortDescription description_, - const String & is_deleted_column, const String & version_column, + const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false, - bool cleanup = false) + bool use_average_block_sizes = false) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), - is_deleted_column, version_column, max_block_size_rows, max_block_size_bytes, out_row_sources_buf_, - use_average_block_sizes, - cleanup) + use_average_block_sizes) { } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f7d8ff9cd29..3e540af757c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1025,7 +1025,7 @@ static void addMergingFinal( case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size_rows, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); + sort_description, merging_params.version_column, max_block_size_rows, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, @@ -1128,8 +1128,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// can use parallel select on such parts. bool no_merging_final = do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0 && - data.merging_params.is_deleted_column.empty(); + parts_to_merge_ranges[range_index]->data_part->info.level > 0; if (no_merging_final) { @@ -1839,8 +1838,6 @@ Pipe ReadFromMergeTree::spreadMarkRanges( } } - if (!data.merging_params.is_deleted_column.empty() && !names.contains(data.merging_params.is_deleted_column)) - column_names_to_read.push_back(data.merging_params.is_deleted_column); if (!data.merging_params.sign_column.empty() && !names.contains(data.merging_params.sign_column)) column_names_to_read.push_back(data.merging_params.sign_column); if (!data.merging_params.version_column.empty() && !names.contains(data.merging_params.version_column)) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1102c77ca58..ac30b293d4a 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -515,7 +515,6 @@ public: bool /*final*/, bool /*deduplicate*/, const Names & /* deduplicate_by_columns */, - bool /*cleanup*/, ContextPtr /*context*/) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method optimize is not supported by storage {}", getName()); diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 3d8bc62b5cc..9be31859a19 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -312,7 +312,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() reserved_space, entry.deduplicate, entry.deduplicate_by_columns, - entry.cleanup, storage.merging_params, NO_TRANSACTION_PTR); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index c218acce903..aed9f70d216 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -131,7 +131,6 @@ void MergePlainMergeTreeTask::prepare() merge_mutate_entry->tagger->reserved_space, deduplicate, deduplicate_by_columns, - cleanup, storage.merging_params, txn); } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 5cc9c0e50d3..2c93f9c9e2c 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -20,7 +20,6 @@ public: StorageMetadataPtr metadata_snapshot_, bool deduplicate_, Names deduplicate_by_columns_, - bool cleanup_, MergeMutateSelectedEntryPtr merge_mutate_entry_, TableLockHolder table_lock_holder_, IExecutableTask::TaskResultCallback & task_result_callback_) @@ -28,7 +27,6 @@ public: , metadata_snapshot(std::move(metadata_snapshot_)) , deduplicate(deduplicate_) , deduplicate_by_columns(std::move(deduplicate_by_columns_)) - , cleanup(cleanup_) , merge_mutate_entry(std::move(merge_mutate_entry_)) , table_lock_holder(std::move(table_lock_holder_)) , task_result_callback(task_result_callback_) @@ -69,7 +67,6 @@ private: StorageMetadataPtr metadata_snapshot; bool deduplicate; Names deduplicate_by_columns; - bool cleanup; MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr}; TableLockHolder table_lock_holder; FutureMergedMutatedPartPtr future_part{nullptr}; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 786960beb37..a8b657d0e3e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -42,7 +42,6 @@ namespace ErrorCodes extern const int ABORTED; extern const int DIRECTORY_ALREADY_EXISTS; extern const int LOGICAL_ERROR; - extern const int SUPPORT_IS_DISABLED; } @@ -70,10 +69,7 @@ static void extractMergingAndGatheringColumns( /// Force version column for Replacing mode if (merging_params.mode == MergeTreeData::MergingParams::Replacing) - { - key_columns.emplace(merging_params.is_deleted_column); key_columns.emplace(merging_params.version_column); - } /// Force sign column for VersionedCollapsing mode. Version is already in primary key. if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) @@ -510,12 +506,13 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// In special case, when there is only one source part, and no rows were skipped, we may have /// skipped writing rows_sources file. Otherwise rows_sources_count must be equal to the total /// number of input rows. - if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) && sum_input_rows_exact != rows_sources_count + input_rows_filtered) + if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) + && sum_input_rows_exact != rows_sources_count + input_rows_filtered) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " - "of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, rows_sources_count); + ErrorCodes::LOGICAL_ERROR, + "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " + "of bytes written to rows_sources file ({}). It is a bug.", + sum_input_rows_exact, input_rows_filtered, rows_sources_count); /// TemporaryDataOnDisk::createRawStream returns WriteBufferFromFile implementing IReadableWriteBuffer /// and we expect to get ReadBufferFromFile here. @@ -759,7 +756,6 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->space_reservation, global_ctx->deduplicate, global_ctx->deduplicate_by_columns, - global_ctx->cleanup, projection_merging_params, global_ctx->need_prefix, global_ctx->new_data_part.get(), @@ -1023,13 +1019,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() break; case MergeTreeData::MergingParams::Replacing: - if (global_ctx->cleanup && !data_settings->allow_experimental_replacing_merge_with_cleanup) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); - merged_transform = std::make_shared( - header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, - merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, - global_ctx->cleanup); + header, pipes.size(), sort_description, ctx->merging_params.version_column, + merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Graphite: @@ -1118,8 +1110,6 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm return MergeAlgorithm::Horizontal; if (global_ctx->future_part->part_format.storage_type != MergeTreeDataPartStorageType::Full) return MergeAlgorithm::Horizontal; - if (global_ctx->cleanup) - return MergeAlgorithm::Horizontal; if (!data_settings->allow_vertical_merges_from_compact_to_wide_parts) { diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index b2a5796737d..aeede44fe88 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -67,7 +67,6 @@ public: ReservationSharedPtr space_reservation_, bool deduplicate_, Names deduplicate_by_columns_, - bool cleanup_, MergeTreeData::MergingParams merging_params_, bool need_prefix, IMergeTreeDataPart * parent_part_, @@ -91,7 +90,6 @@ public: global_ctx->space_reservation = std::move(space_reservation_); global_ctx->deduplicate = std::move(deduplicate_); global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_); - global_ctx->cleanup = std::move(cleanup_); global_ctx->parent_part = std::move(parent_part_); global_ctx->data = std::move(data_); global_ctx->mutator = std::move(mutator_); @@ -160,7 +158,6 @@ private: ReservationSharedPtr space_reservation{nullptr}; bool deduplicate{false}; Names deduplicate_by_columns{}; - bool cleanup{false}; NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 67337314cac..450bf10bdcb 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -846,10 +846,6 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat { const auto columns = metadata.getColumns().getAllPhysical(); - if (!is_deleted_column.empty() && mode != MergingParams::Replacing) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "is_deleted column for MergeTree cannot be specified in modes except Replacing."); - if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing) throw Exception(ErrorCodes::LOGICAL_ERROR, "Sign column for MergeTree cannot be specified " @@ -919,41 +915,6 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Version column {} does not exist in table declaration.", version_column); }; - /// Check that if the is_deleted column is needed, it exists and is of type UInt8. If exist, version column must be defined too but version checks are not done here. - auto check_is_deleted_column = [this, & columns](bool is_optional, const std::string & storage) - { - if (is_deleted_column.empty()) - { - if (is_optional) - return; - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage); - } - else - { - if (version_column.empty() && !is_optional) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.", - version_column, storage, is_deleted_column); - - bool miss_is_deleted_column = true; - for (const auto & column : columns) - { - if (column.name == is_deleted_column) - { - if (!typeid_cast(column.type.get())) - throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "is_deleted column ({}) for storage {} must have type UInt8. Provided column of type {}.", - is_deleted_column, storage, column.type->getName()); - miss_is_deleted_column = false; - break; - } - } - - if (miss_is_deleted_column) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "is_deleted column {} does not exist in table declaration.", is_deleted_column); - } - }; - - if (mode == MergingParams::Collapsing) check_sign_column(false, "CollapsingMergeTree"); @@ -990,7 +951,6 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (mode == MergingParams::Replacing) { - check_is_deleted_column(true, "ReplacingMergeTree"); check_version_column(true, "ReplacingMergeTree"); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index dfa13eca11d..ab3a641e37a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -349,9 +349,6 @@ public: /// For Collapsing and VersionedCollapsing mode. String sign_column; - /// For Replacing mode. Can be empty for Replacing. - String is_deleted_column; - /// For Summing mode. If empty - columns_to_sum is determined automatically. Names columns_to_sum; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f78b383e173..42f480ed18a 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -676,7 +676,6 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( ReservationSharedPtr space_reservation, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, bool need_prefix, @@ -693,7 +692,6 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( space_reservation, deduplicate, deduplicate_by_columns, - cleanup, merging_params, need_prefix, parent_part, diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 6eab0ee0c37..5e8a89c94a4 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -165,7 +165,6 @@ public: ReservationSharedPtr space_reservation, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, bool need_prefix = true, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2a381afa805..f63394a4d48 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -325,7 +325,7 @@ Block MergeTreeDataWriter::mergeBlock( return nullptr; case MergeTreeData::MergingParams::Replacing: return std::make_shared( - block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0); + block, 1, sort_description, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::Collapsing: return std::make_shared( block, 1, sort_description, merging_params.sign_column, diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index eb6c14d7754..106e66d8a99 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -192,7 +192,6 @@ struct Settings; M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \ - M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ @@ -233,7 +232,7 @@ struct Settings; MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_send_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_receive_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) \ - MAKE_OBSOLETE_MERGE_TREE_SETTING(M, CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, String, clean_deleted_rows, "") \ /// Settings that should not change after the creation of a table. /// NOLINTNEXTLINE diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e4070aa8262..8c896edab14 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1057,7 +1057,6 @@ public: ctx->space_reservation, false, // TODO Do we need deduplicate for projections {}, - false, // no cleanup projection_merging_params, NO_TRANSACTION_PTR, /* need_prefix */ true, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 9eb8b6ce24c..fc924d1f80c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -96,9 +96,6 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const } } - if (cleanup) - out << "\ncleanup: " << cleanup; - break; case DROP_RANGE: @@ -273,7 +270,11 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor deduplicate_by_columns = std::move(new_deduplicate_by_columns); } else if (checkString("cleanup: ", in)) + { + /// Obsolete option, does nothing. + bool cleanup = false; in >> cleanup; + } else trailing_newline_found = true; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 0ce59b18818..4821a80a29b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -98,7 +98,6 @@ struct ReplicatedMergeTreeLogEntryData Strings source_parts; bool deduplicate = false; /// Do deduplicate on merge Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default). - bool cleanup = false; MergeType merge_type = MergeType::Regular; String column_name; String index_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 41188891118..eec5454f9a7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -52,7 +52,6 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; - is_deleted_column = data.merging_params.is_deleted_column; columns_to_sum = fmt::format("{}", fmt::join(data.merging_params.columns_to_sum.begin(), data.merging_params.columns_to_sum.end(), ",")); version_column = data.merging_params.version_column; if (data.merging_params.mode == MergeTreeData::MergingParams::Graphite) @@ -157,8 +156,6 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const out << "merge parameters format version: " << merge_params_version << "\n"; if (!version_column.empty()) out << "version column: " << version_column << "\n"; - if (!is_deleted_column.empty()) - out << "is_deleted column: " << is_deleted_column << "\n"; if (!columns_to_sum.empty()) out << "columns to sum: " << columns_to_sum << "\n"; if (!graphite_params_hash.empty()) @@ -224,9 +221,6 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("version column: ", in)) in >> version_column >> "\n"; - if (checkString("is_deleted column: ", in)) - in >> is_deleted_column >> "\n"; - if (checkString("columns to sum: ", in)) in >> columns_to_sum >> "\n"; @@ -279,10 +273,6 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in version column. " "Stored in ZooKeeper: {}, local: {}", from_zk.version_column, version_column); - if (is_deleted_column != from_zk.is_deleted_column) - throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in is_deleted column. " - "Stored in ZooKeeper: {}, local: {}", from_zk.is_deleted_column, is_deleted_column); - if (columns_to_sum != from_zk.columns_to_sum) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sum columns. " "Stored in ZooKeeper: {}, local: {}", from_zk.columns_to_sum, columns_to_sum); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 15ed8671f9b..67de9fd64ba 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -29,7 +29,6 @@ struct ReplicatedMergeTreeTableMetadata int merge_params_version = REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS; String sign_column; String version_column; - String is_deleted_column; String columns_to_sum; String graphite_params_hash; String primary_key; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a5af77d57c..9ed87e5c9ef 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -138,7 +138,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) * CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign) * SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum]) * AggregatingMergeTree(date, [sample_key], primary_key, index_granularity) - * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column [, is_deleted_column]]) + * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column]) * GraphiteMergeTree(date, [sample_key], primary_key, index_granularity, 'config_element') * * Alternatively, you can specify: @@ -441,15 +441,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) } else if (merging_params.mode == MergeTreeData::MergingParams::Replacing) { - // if there is args and number of optional parameter is higher than 1 - // is_deleted is not allowed with the 'allow_deprecated_syntax_for_merge_tree' settings - if (arg_cnt - arg_num == 2 && !engine_args[arg_cnt - 1]->as() && is_extended_storage_def) - { - if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.is_deleted_column)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "is_deleted column name must be an identifier {}", verbose_help_message); - --arg_cnt; - } - /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column. if (arg_cnt && !engine_args[arg_cnt - 1]->as()) { diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 4ead714c740..c9843211e08 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -321,7 +321,6 @@ bool StorageEmbeddedRocksDB::optimize( bool final, bool deduplicate, const Names & /* deduplicate_by_columns */, - bool cleanup, ContextPtr /*context*/) { if (partition) @@ -333,9 +332,6 @@ bool StorageEmbeddedRocksDB::optimize( if (deduplicate) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type EmbeddedRocksDB"); - if (cleanup) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type EmbeddedRocksDB"); - std::shared_lock lock(rocksdb_ptr_mx); rocksdb::CompactRangeOptions compact_options; auto status = rocksdb_ptr->CompactRange(compact_options, nullptr, nullptr); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index b09dfca7338..f2112641234 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -65,7 +65,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr context) override; bool supportsParallelInsert() const override { return true; } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index ba5d922dc86..943bf0eb801 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -685,7 +685,7 @@ void StorageBuffer::flushAndPrepareForShutdown() try { - optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, getContext()); + optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, getContext()); } catch (...) { @@ -711,7 +711,6 @@ bool StorageBuffer::optimize( bool final, bool deduplicate, const Names & /* deduplicate_by_columns */, - bool cleanup, ContextPtr /*context*/) { if (partition) @@ -723,9 +722,6 @@ bool StorageBuffer::optimize( if (deduplicate) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type Buffer"); - if (cleanup) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type Buffer"); - flushAllBuffers(false); return true; } @@ -1058,7 +1054,7 @@ void StorageBuffer::alter(const AlterCommands & params, ContextPtr local_context auto metadata_snapshot = getInMemoryMetadataPtr(); /// Flush buffers to the storage because BufferSource skips buffers with old metadata_version. - optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, local_context); + optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, local_context); StorageInMemoryMetadata new_metadata = *metadata_snapshot; params.apply(new_metadata, local_context); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index ef646a12548..2610cf79989 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -100,7 +100,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr context) override; bool supportsSampling() const override { return true; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 0d958d20f49..2339fd11cf8 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -262,13 +262,12 @@ bool StorageMaterializedView::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr local_context) { checkStatementCanBeForwarded(); auto storage_ptr = getTargetTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); + return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } void StorageMaterializedView::alter( diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 84e0c3d0b5b..f37abdfb1a3 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -47,7 +47,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr context) override; void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e7ca50f4a5c..9378aaa1f6a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -62,7 +62,6 @@ namespace ErrorCodes extern const int UNKNOWN_POLICY; extern const int NO_SUCH_DATA_PART; extern const int ABORTED; - extern const int SUPPORT_IS_DISABLED; } namespace ActionLocks @@ -1096,7 +1095,6 @@ bool StorageMergeTree::merge( bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, const MergeTreeTransactionPtr & txn, String & out_disable_reason, bool optimize_skip_merged_partitions) @@ -1136,7 +1134,7 @@ bool StorageMergeTree::merge( /// Copying a vector of columns `deduplicate by columns. IExecutableTask::TaskResultCallback f = [](bool) {}; auto task = std::make_shared( - *this, metadata_snapshot, deduplicate, deduplicate_by_columns, cleanup, merge_mutate_entry, table_lock_holder, f); + *this, metadata_snapshot, deduplicate, deduplicate_by_columns, merge_mutate_entry, table_lock_holder, f); task->setCurrentTransaction(MergeTreeTransactionHolder{}, MergeTreeTransactionPtr{txn}); @@ -1374,7 +1372,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merge_entry) { - auto task = std::make_shared(*this, metadata_snapshot, /* deduplicate */ false, Names{}, /* cleanup */ false, merge_entry, shared_lock, common_assignee_trigger); + auto task = std::make_shared(*this, metadata_snapshot, /* deduplicate */ false, Names{}, merge_entry, shared_lock, common_assignee_trigger); task->setCurrentTransaction(std::move(transaction_for_merge), std::move(txn)); bool scheduled = assignee.scheduleMergeMutateTask(task); /// The problem that we already booked a slot for TTL merge, but a merge list entry will be created only in a prepare method @@ -1508,7 +1506,6 @@ bool StorageMergeTree::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr local_context) { if (deduplicate) @@ -1524,16 +1521,6 @@ bool StorageMergeTree::optimize( String disable_reason; if (!partition && final) { - if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) - { - constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; - disable_reason = "only ReplacingMergeTree can be CLEANUP"; - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); - } - - if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); - DataPartsVector data_parts = getVisibleDataPartsVector(local_context); std::unordered_set partition_ids; @@ -1548,7 +1535,6 @@ bool StorageMergeTree::optimize( true, deduplicate, deduplicate_by_columns, - cleanup, txn, disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) @@ -1576,7 +1562,6 @@ bool StorageMergeTree::optimize( final, deduplicate, deduplicate_by_columns, - cleanup, txn, disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index b2829ecb17f..f4dc52659b1 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -81,7 +81,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr context) override; void mutate(const MutationCommands & commands, ContextPtr context) override; @@ -170,14 +169,13 @@ private: * Returns true if merge is finished successfully. */ bool merge( - bool aggressive, - const String & partition_id, - bool final, bool deduplicate, - const Names & deduplicate_by_columns, - bool cleanup, - const MergeTreeTransactionPtr & txn, - String & out_disable_reason, - bool optimize_skip_merged_partitions = false); + bool aggressive, + const String & partition_id, + bool final, bool deduplicate, + const Names & deduplicate_by_columns, + const MergeTreeTransactionPtr & txn, + String & out_disable_reason, + bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 269ddf57fa2..8fbc1313528 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -121,16 +121,15 @@ public: } bool optimize( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - const ASTPtr & partition, - bool final, - bool deduplicate, - const Names & deduplicate_by_columns, - bool cleanup, - ContextPtr context) override + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Names & deduplicate_by_columns, + ContextPtr context) override { - return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, context); + return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context); } void mutate(const MutationCommands & commands, ContextPtr context) override { getNested()->mutate(commands, context); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index eefcab01236..f143a2ec78b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3730,12 +3730,10 @@ void StorageReplicatedMergeTree::mergeSelectingTask() future_merged_part->part_format, deduplicate, deduplicate_by_columns, - /*cleanup*/ false, nullptr, merge_pred->getVersion(), future_merged_part->merge_type); - if (create_result == CreateMergeEntryResult::Ok) return AttemptStatus::EntryCreated; if (create_result == CreateMergeEntryResult::LogUpdated) @@ -3852,7 +3850,6 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ReplicatedMergeTreeLogEntryData * out_log_entry, int32_t log_version, MergeType merge_type) @@ -3892,7 +3889,6 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c entry.merge_type = merge_type; entry.deduplicate = deduplicate; entry.deduplicate_by_columns = deduplicate_by_columns; - entry.cleanup = cleanup; entry.create_time = time(nullptr); for (const auto & part : parts) @@ -5627,7 +5623,6 @@ bool StorageReplicatedMergeTree::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr query_context) { /// NOTE: exclusive lock cannot be used here, since this may lead to deadlock (see comments below), @@ -5639,13 +5634,6 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); - if (cleanup) - { - if (!getSettings()->allow_experimental_replacing_merge_with_cleanup) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); - LOG_DEBUG(log, "Cleanup the ReplicatedMergeTree."); - } - auto handle_noop = [&](FormatStringHelper fmt_string, Args && ...args) { PreformattedMessage message = fmt_string.format(std::forward(args)...); @@ -5724,7 +5712,6 @@ bool StorageReplicatedMergeTree::optimize( future_merged_part->uuid, future_merged_part->part_format, deduplicate, deduplicate_by_columns, - cleanup, &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); @@ -5749,13 +5736,6 @@ bool StorageReplicatedMergeTree::optimize( bool assigned = false; if (!partition && final) { - if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) - { - constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; - String disable_reason = "only ReplacingMergeTree can be CLEANUP"; - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); - } - DataPartsVector data_parts = getVisibleDataPartsVector(query_context); std::unordered_set partition_ids; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 556d23d6903..bb2cc04411a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -178,7 +178,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr query_context) override; void alter(const AlterCommands & commands, ContextPtr query_context, AlterLockHolder & table_lock_holder) override; @@ -746,7 +745,6 @@ private: const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ReplicatedMergeTreeLogEntryData * out_log_entry, int32_t log_version, MergeType merge_type); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 46c38ffa129..3eff3f9f995 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -435,12 +435,11 @@ bool StorageWindowView::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr local_context) { auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); + return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } void StorageWindowView::alter( diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index de8f880c602..d2484ae8ebf 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -134,7 +134,6 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, - bool cleanup, ContextPtr context) override; void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference index 6bac6173183..a4d91178d73 100644 --- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference +++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference @@ -3,5 +3,7 @@ 2018-01-01 2 2 2018-01-01 2 2 == (Replicas) Test optimize == +d1 2 1 d2 1 0 +d3 2 1 d4 1 0 diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql index 871f96bb019..9e293d0f7e2 100644 --- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql +++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql @@ -3,28 +3,28 @@ set optimize_on_insert = 0; drop table if exists tab_00577; create table tab_00577 (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0, - min_bytes_for_wide_part = 0, allow_experimental_replacing_merge_with_cleanup=1; + min_bytes_for_wide_part = 0; insert into tab_00577 values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1); insert into tab_00577 values ('2018-01-01', 0, 0); select * from tab_00577 order by version; -OPTIMIZE TABLE tab_00577 FINAL CLEANUP; +OPTIMIZE TABLE tab_00577 FINAL; select * from tab_00577; drop table tab_00577; DROP TABLE IF EXISTS testCleanupR1; CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version) ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0, - min_bytes_for_wide_part = 0, allow_experimental_replacing_merge_with_cleanup=1; + min_bytes_for_wide_part = 0; INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1); INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1); SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" -OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP; +OPTIMIZE TABLE testCleanupR1 FINAL; -- Only d3 to d5 remain SELECT '== (Replicas) Test optimize =='; SELECT * FROM testCleanupR1 order by uid; -DROP TABLE IF EXISTS testCleanupR1 \ No newline at end of file +DROP TABLE IF EXISTS testCleanupR1 diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference deleted file mode 100644 index c897004b4e3..00000000000 --- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference +++ /dev/null @@ -1,121 +0,0 @@ -== Test SELECT ... FINAL - no is_deleted == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -== Test SELECT ... FINAL == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -== Insert backups == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -== Insert a second batch with overlaping data == -d1 5 0 -d2 3 0 -d3 3 0 -d4 3 0 -d5 1 0 -== Only last version remains after OPTIMIZE W/ CLEANUP == -d1 5 0 -d2 1 0 -d3 1 0 -d4 1 0 -d5 1 0 -d6 3 0 -== OPTIMIZE W/ CLEANUP (remove d6) == -d1 5 0 -d2 1 0 -d3 1 0 -d4 1 0 -d5 1 0 -== Test of the SETTINGS clean_deleted_rows as Always == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -== Test of the SETTINGS clean_deleted_rows as Never == -d1 5 0 -d2 1 0 -d3 1 0 -d4 3 0 -d5 1 0 -d6 2 1 -== (Replicas) Test optimize == -d2 1 0 -d4 1 0 -== (Replicas) Test settings == -c2 1 0 -c4 1 0 -no cleanup 1 d1 5 0 -no cleanup 1 d2 1 0 -no cleanup 1 d3 1 0 -no cleanup 1 d4 3 0 -no cleanup 1 d5 1 0 -no cleanup 2 d1 5 0 -no cleanup 2 d2 1 0 -no cleanup 2 d3 1 0 -no cleanup 2 d4 3 0 -no cleanup 2 d5 1 0 -no cleanup 2 d6 2 1 -no cleanup 3 d1 5 0 -no cleanup 3 d2 1 0 -no cleanup 3 d3 1 0 -no cleanup 3 d4 3 0 -no cleanup 3 d5 1 0 -no cleanup 4 d1 5 0 -no cleanup 4 d2 1 0 -no cleanup 4 d3 1 0 -no cleanup 4 d4 3 0 -no cleanup 4 d5 1 0 -no cleanup 4 d6 2 1 -== Check cleanup & settings for other merge trees == -d1 1 1 -d1 1 1 -d1 1 1 -d1 1 1 1 -d1 1 1 1 diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql deleted file mode 100644 index 80c18ae308b..00000000000 --- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql +++ /dev/null @@ -1,174 +0,0 @@ --- Tags: zookeeper - --- Settings allow_deprecated_syntax_for_merge_tree prevent to enable the is_deleted column -set allow_deprecated_syntax_for_merge_tree=0; - --- Test the bahaviour without the is_deleted column -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -SELECT '== Test SELECT ... FINAL - no is_deleted =='; -select * from test FINAL order by uid; -OPTIMIZE TABLE test FINAL CLEANUP; -select * from test order by uid; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -SELECT '== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always =='; -select * from test FINAL order by uid; -OPTIMIZE TABLE test FINAL CLEANUP; -select * from test order by uid; - --- Test the new behaviour -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -SELECT '== Test SELECT ... FINAL =='; -select * from test FINAL order by uid; -select * from test order by uid; - -SELECT '== Insert backups =='; -INSERT INTO test (*) VALUES ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1); -select * from test FINAL order by uid; - -SELECT '== Insert a second batch with overlaping data =='; -INSERT INTO test (*) VALUES ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 1), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0), ('d2', 2, 1), ('d2', 3, 0), ('d3', 2, 1), ('d3', 3, 0); -select * from test FINAL order by uid; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; - --- Expect d6 to be version=3 is_deleted=false -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); --- Insert previous version of 'd6' but only v=3 is_deleted=false will remain -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); -SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; -OPTIMIZE TABLE test FINAL CLEANUP; -select * from test order by uid; - --- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); - -SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; -OPTIMIZE TABLE test FINAL CLEANUP; --- No d6 anymore -select * from test order by uid; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; - -SELECT '== Test of the SETTINGS clean_deleted_rows as Always =='; -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); --- Even if the setting is set to Always, the SELECT FINAL doesn't delete rows -select * from test FINAL order by uid; -select * from test order by uid; - -OPTIMIZE TABLE test FINAL; --- d6 has to be removed since we set clean_deleted_rows as 'Always' -select * from test where is_deleted=0 order by uid; - -SELECT '== Test of the SETTINGS clean_deleted_rows as Never =='; -ALTER TABLE test MODIFY SETTING clean_deleted_rows='Never'; -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -OPTIMIZE TABLE test FINAL; --- d6 has NOT to be removed since we set clean_deleted_rows as 'Never' -select * from test order by uid; - -DROP TABLE IF EXISTS testCleanupR1; - -CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) - ORDER BY uid settings allow_experimental_replacing_merge_with_cleanup=1; - - -INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); -INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1); -INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1); -SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" - -OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP; - --- Only d3 to d5 remain -SELECT '== (Replicas) Test optimize =='; -SELECT * FROM testCleanupR1 order by uid; - ------------------------------- - -DROP TABLE IF EXISTS testSettingsR1; - -CREATE TABLE testSettingsR1 (col1 String, version UInt32, is_deleted UInt8) - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_setting/', 'r1', version, is_deleted) - ORDER BY col1 - SETTINGS clean_deleted_rows = 'Always', allow_experimental_replacing_merge_with_cleanup=1; - -INSERT INTO testSettingsR1 (*) VALUES ('c1', 1, 1),('c2', 1, 0),('c3', 1, 1),('c4', 1, 0); -SYSTEM SYNC REPLICA testSettingsR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" - -OPTIMIZE TABLE testSettingsR1 FINAL; - --- Only d3 to d5 remain -SELECT '== (Replicas) Test settings =='; -SELECT * FROM testSettingsR1 where is_deleted=0 order by col1; - - ------------------------------- --- Check errors -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; - --- is_deleted == 0/1 -INSERT INTO test (*) VALUES ('d1', 1, 2); -- { serverError INCORRECT_DATA } - -DROP TABLE IF EXISTS test; --- checkis_deleted type -CREATE TABLE test (uid String, version UInt32, is_deleted String) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); -- { serverError BAD_TYPE_OF_FIELD } - -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -select 'no cleanup 1', * from test FINAL order by uid; -OPTIMIZE TABLE test FINAL CLEANUP; -- { serverError SUPPORT_IS_DISABLED } -select 'no cleanup 2', * from test order by uid; -DROP TABLE test; - -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/no_cleanup/', 'r1', version, is_deleted) Order by (uid); -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -select 'no cleanup 3', * from test FINAL order by uid; -OPTIMIZE TABLE test FINAL CLEANUP; -- { serverError SUPPORT_IS_DISABLED } -select 'no cleanup 4', * from test order by uid; -DROP TABLE test; - --- is_deleted column for other mergeTrees - ErrorCodes::LOGICAL_ERROR) - --- Check clean_deleted_rows='Always' for other MergeTrees -SELECT '== Check cleanup & settings for other merge trees =='; -CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = MergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO testMT (*) VALUES ('d1', 1, 1); -OPTIMIZE TABLE testMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } -OPTIMIZE TABLE testMT FINAL; -SELECT * FROM testMT order by uid; - -CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO testSummingMT (*) VALUES ('d1', 1, 1); -OPTIMIZE TABLE testSummingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } -OPTIMIZE TABLE testSummingMT FINAL; -SELECT * FROM testSummingMT order by uid; - -CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO testAggregatingMT (*) VALUES ('d1', 1, 1); -OPTIMIZE TABLE testAggregatingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } -OPTIMIZE TABLE testAggregatingMT FINAL; -SELECT * FROM testAggregatingMT order by uid; - -CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO testCollapsingMT (*) VALUES ('d1', 1, 1, 1); -OPTIMIZE TABLE testCollapsingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } -OPTIMIZE TABLE testCollapsingMT FINAL; -SELECT * FROM testCollapsingMT order by uid; - -CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; -INSERT INTO testVersionedCMT (*) VALUES ('d1', 1, 1, 1); -OPTIMIZE TABLE testVersionedCMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } -OPTIMIZE TABLE testVersionedCMT FINAL; -SELECT * FROM testVersionedCMT order by uid; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference deleted file mode 100644 index d19222b55ec..00000000000 --- a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference +++ /dev/null @@ -1,31 +0,0 @@ ---- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 ---- Verify that ReplacingMergeTree properly handles _is_deleted: ---- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. --- { echoOn } - -DROP TABLE IF EXISTS t; -CREATE TABLE t -( - `account_id` UInt64, - `_is_deleted` UInt8, - `_version` UInt64 -) -ENGINE = ReplacingMergeTree(_version, _is_deleted) -ORDER BY (account_id); -INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); --- Mark the first 100 rows as deleted. -INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); --- Put everything in one partition -OPTIMIZE TABLE t FINAL; -SELECT count() FROM t; -1000 -SELECT count() FROM t FINAL; -900 --- Both should produce the same number of rows. --- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, --- as if no rows were deleted. -SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; -900 -SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; -900 -DROP TABLE t; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql deleted file mode 100644 index a89a1ff590a..00000000000 --- a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql +++ /dev/null @@ -1,32 +0,0 @@ ---- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 ---- Verify that ReplacingMergeTree properly handles _is_deleted: ---- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. --- { echoOn } - -DROP TABLE IF EXISTS t; -CREATE TABLE t -( - `account_id` UInt64, - `_is_deleted` UInt8, - `_version` UInt64 -) -ENGINE = ReplacingMergeTree(_version, _is_deleted) -ORDER BY (account_id); - -INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); --- Mark the first 100 rows as deleted. -INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); - --- Put everything in one partition -OPTIMIZE TABLE t FINAL; - -SELECT count() FROM t; -SELECT count() FROM t FINAL; - --- Both should produce the same number of rows. --- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, --- as if no rows were deleted. -SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; -SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; - -DROP TABLE t; diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference deleted file mode 100644 index 9c9caa22139..00000000000 --- a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference +++ /dev/null @@ -1,13 +0,0 @@ -== Only last version remains after OPTIMIZE W/ CLEANUP == -d1 5 0 -d2 1 0 -d3 1 0 -d4 1 0 -d5 1 0 -d6 3 0 -== OPTIMIZE W/ CLEANUP (remove d6) == -d1 5 0 -d2 1 0 -d3 1 0 -d4 1 0 -d5 1 0 diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql deleted file mode 100644 index 4cd44a131e3..00000000000 --- a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql +++ /dev/null @@ -1,24 +0,0 @@ -DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS vertical_merge_algorithm_min_rows_to_activate = 1, - vertical_merge_algorithm_min_columns_to_activate = 0, - min_rows_for_wide_part = 1, - min_bytes_for_wide_part = 1, - allow_experimental_replacing_merge_with_cleanup=1; - --- Expect d6 to be version=3 is_deleted=false -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); --- Insert previous version of 'd6' but only v=3 is_deleted=false will remain -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); -SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; -OPTIMIZE TABLE test FINAL CLEANUP; -select * from test order by uid; - --- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) -INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); - -SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; -OPTIMIZE TABLE test FINAL CLEANUP; --- No d6 anymore -select * from test order by uid; - -DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql index 3c1bec4fb3f..c832e16e81e 100644 --- a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql +++ b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql @@ -17,26 +17,6 @@ CREATE TABLE t_r ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2') ORDER BY id; -- { serverError METADATA_MISMATCH } -CREATE TABLE t2 -( - `id` UInt64, - `val` String, - `legacy_ver` UInt64, - `deleted` UInt8 -) -ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r1', legacy_ver) -ORDER BY id; - -CREATE TABLE t2_r -( - `id` UInt64, - `val` String, - `legacy_ver` UInt64, - `deleted` UInt8 -) -ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver, deleted) -ORDER BY id; -- { serverError METADATA_MISMATCH } - CREATE TABLE t3 ( `key` UInt64, diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql new file mode 100644 index 00000000000..002d696e62f --- /dev/null +++ b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql @@ -0,0 +1,7 @@ +# There was a wrong, harmful feature, leading to bugs and data corruption. +# This feature is removed, but we take care to maintain compatibility on the syntax level, so now it works as a no-op. + +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt8, PRIMARY KEY x) ENGINE = ReplacingMergeTree; +OPTIMIZE TABLE t CLEANUP; +DROP TABLE t;