ClickHouse/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
youennL-cs 6526c2a8ab
[RFC] Replacing merge tree new engine (#41005)
* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add keyword 'CLEANUP' when OPTIMIZE

* Cleanup uniquely when it's a replacingMergeTree

* Propagate CLEANUP information and change from 'with_cleanup' to 'cleanup'

* Cleanup data flagged as 'is_deleted'

* Fix merge when optimize and add a test

* Fix OPTIMIZE and INSERT + add tests

* New fix for cleanup at the merge

* Cleanup debug logs

* Add the SETTINGS option 'clean_deleted_rows' that can be 'never' or 'always'

* Fix regression bug; Now REplicatedMergeTree can be called as before without 'is_deleted'

* Add Replicated tests

* Disable tag 'long' for our test and cleanup some white spaces

* Update tests

* Fix tests and remove additional useless whitespace

* Fix replica test

* Style clean && add condition check for is_deleted values

* clean_deleted_rows settings is nom an enum

* Add valid default value to the clean_deleted_rows settings

* Update cleanup checkers to use the enum and fix typos in the test

* Fix submodule contrib/AMQP-CPP pointer

* Add missing messages in test reference and remove a print with non derterministic order

* fix replica test reference

* Fix edge case

* Fix a typo for the spell checker

* Fix reference

* Fix a condition to raise an error if is_deleted differ from 0/1 and cleanup

* Change tests file name and update number

* This should fix the ReplacingMergeTree parameter set

* Fix replicated parameters

* Disable allow_deprecated_syntax_for_merge_tree for our new column

* Fix a test

* Remove non deterministic order print in the test

* Test on replicas

* Remove a condition, when checking optional parameters, that should not be sueful since we disabled the deprected_syntaxe

* Revert "Remove a condition, when checking optional parameters, that should not be useful since we disabled the deprected_syntaxe"

This reverts commit b65d64c05e.

* Fix replica management and limit the number of argument to two maximum, due to the possiblity of deprecated table create/attach failing otherwise

* Test a fix for replicated log information error

* Try to add sync to have consistent results

* Change path of replicas that should cause one issue and add few prints in case it's not that

* Get cleanup info on replicas only if information found

* Fix style issues

* Try to avoid replication error 'cannot select parts...' and and replica read/write field order

* Cleanup according to PR reviews
 and add tests on error raised.

* Update src/Storages/MergeTree/registerStorageMergeTree.cpp

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>

* Select ... FINAL don't show rows with is_deleted = true

* Update and fix SELECT ... FINAL merge parameter

* Remove is_deleted rows only on the version inserted when merge

* Fix (master) updates issues

* Revert changes that should not be commited

* Add changes according to review

* Revert changes that should not be commited - part 2

---------

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>
2023-02-16 16:03:16 +03:00

196 lines
8.5 KiB
C++

#pragma once
#include <Common/Exception.h>
#include <Common/ZooKeeper/Types.h>
#include <base/types.h>
#include <IO/WriteHelpers.h>
#include <Storages/MergeTree/MergeTreeDataPartType.h>
#include <Storages/MergeTree/MergeType.h>
#include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
#include <Disks/IDisk.h>
#include <mutex>
#include <condition_variable>
namespace DB
{
class ReadBuffer;
class WriteBuffer;
class ReplicatedMergeTreeQueue;
struct MergeTreePartInfo;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/// Record about what needs to be done. Only data (you can copy them).
struct ReplicatedMergeTreeLogEntryData
{
enum Type
{
EMPTY, /// Not used.
GET_PART, /// Get the part from another replica.
ATTACH_PART, /// Attach the part, possibly from our own replica (if found in /detached folder).
/// You may think of it as a GET_PART with some optimisations as they're nearly identical.
MERGE_PARTS, /// Merge the parts.
DROP_RANGE, /// Delete the parts in the specified partition in the specified number range.
CLEAR_COLUMN, /// NOTE: Deprecated. Drop specific column from specified partition.
CLEAR_INDEX, /// NOTE: Deprecated. Drop specific index from specified partition.
REPLACE_RANGE, /// Drop certain range of partitions and replace them by new ones
MUTATE_PART, /// Apply one or several mutations to the part.
ALTER_METADATA, /// Apply alter modification according to global /metadata and /columns paths
SYNC_PINNED_PART_UUIDS, /// Synchronization point for ensuring that all replicas have up to date in-memory state.
CLONE_PART_FROM_SHARD, /// Clone part from another shard.
DROP_PART, /// NOTE: Virtual (has the same (de)serialization format as DROP_RANGE). Deletes the specified part.
};
static String typeToString(Type type)
{
switch (type)
{
case ReplicatedMergeTreeLogEntryData::GET_PART: return "GET_PART";
case ReplicatedMergeTreeLogEntryData::ATTACH_PART: return "ATTACH_PART";
case ReplicatedMergeTreeLogEntryData::MERGE_PARTS: return "MERGE_PARTS";
case ReplicatedMergeTreeLogEntryData::DROP_RANGE: return "DROP_RANGE";
case ReplicatedMergeTreeLogEntryData::CLEAR_COLUMN: return "CLEAR_COLUMN";
case ReplicatedMergeTreeLogEntryData::CLEAR_INDEX: return "CLEAR_INDEX";
case ReplicatedMergeTreeLogEntryData::REPLACE_RANGE: return "REPLACE_RANGE";
case ReplicatedMergeTreeLogEntryData::MUTATE_PART: return "MUTATE_PART";
case ReplicatedMergeTreeLogEntryData::ALTER_METADATA: return "ALTER_METADATA";
case ReplicatedMergeTreeLogEntryData::SYNC_PINNED_PART_UUIDS: return "SYNC_PINNED_PART_UUIDS";
case ReplicatedMergeTreeLogEntryData::CLONE_PART_FROM_SHARD: return "CLONE_PART_FROM_SHARD";
case ReplicatedMergeTreeLogEntryData::DROP_PART: return "DROP_PART";
default:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown log entry type: {}", DB::toString<int>(type));
}
}
String typeToString() const
{
return typeToString(type);
}
void writeText(WriteBuffer & out) const;
void readText(ReadBuffer & in, MergeTreeDataFormatVersion partition_format_version);
String toString() const;
String znode_name;
String log_entry_id;
Type type = EMPTY;
String source_replica; /// Empty string means that this entry was added to the queue immediately, and not copied from the log.
String source_shard;
String part_checksum; /// Part checksum for ATTACH_PART, empty otherwise.
/// The name of resulting part for GET_PART and MERGE_PARTS
/// Part range for DROP_RANGE and CLEAR_COLUMN
String new_part_name;
MergeTreeDataPartFormat new_part_format;
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/).
mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'.
UUID new_part_uuid = UUIDHelpers::Nil;
Strings source_parts;
bool deduplicate = false; /// Do deduplicate on merge
Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
bool cleanup = false;
MergeType merge_type = MergeType::Regular;
String column_name;
String index_name;
/// For DROP_RANGE, true means that the parts need not be deleted, but moved to the `detached` directory.
bool detach = false;
/// REPLACE PARTITION FROM command
struct ReplaceRangeEntry
{
String drop_range_part_name;
String from_database;
String from_table;
Strings src_part_names; // as in from_table
Strings new_part_names;
Strings part_names_checksums;
int columns_version;
void writeText(WriteBuffer & out) const;
void readText(ReadBuffer & in);
static bool isMovePartitionOrAttachFrom(const MergeTreePartInfo & drop_range_info);
};
std::shared_ptr<ReplaceRangeEntry> replace_range_entry;
/// ALTER METADATA and MUTATE PART command
/// Version of metadata which will be set after this alter
/// Also present in MUTATE_PART command, to track mutations
/// required for complete alter execution.
int alter_version = -1; /// May be equal to -1, if it's normal mutation, not metadata update.
/// only ALTER METADATA command
/// NOTE It's never used
bool have_mutation = false; /// If this alter requires additional mutation step, for data update
String columns_str; /// New columns data corresponding to alter_version
String metadata_str; /// New metadata corresponding to alter_version
/// Returns a set of parts that will appear after executing the entry + parts to block
/// selection of merges. These parts are added to queue.virtual_parts.
Strings getVirtualPartNames(MergeTreeDataFormatVersion format_version) const;
/// Returns fake part for drop range (for DROP_RANGE and REPLACE_RANGE)
std::optional<String> getDropRange(MergeTreeDataFormatVersion format_version) const;
String getDescriptionForLogs(MergeTreeDataFormatVersion format_version) const;
/// This entry is DROP PART, not DROP PARTITION. They both have same
/// DROP_RANGE entry type, but differs in information about drop range.
bool isDropPart(MergeTreeDataFormatVersion format_version) const;
/// Access under queue_mutex, see ReplicatedMergeTreeQueue.
bool currently_executing = false; /// Whether the action is executing now.
bool removed_by_other_entry = false;
/// These several fields are informational only (for viewing by the user using system tables).
/// Access under queue_mutex, see ReplicatedMergeTreeQueue.
size_t num_tries = 0; /// The number of attempts to perform the action (since the server started, including the running one).
std::exception_ptr exception; /// The last exception, in the case of an unsuccessful attempt to perform the action.
time_t last_exception_time = 0; /// The time at which the last exception occurred.
time_t last_attempt_time = 0; /// The time at which the last attempt was attempted to complete the action.
size_t num_postponed = 0; /// The number of times the action was postponed.
String postpone_reason; /// The reason why the action was postponed, if it was postponed.
time_t last_postpone_time = 0; /// The time of the last time the action was postponed.
/// Creation time or the time to copy from the general log to the queue of a particular replica.
time_t create_time = 0;
/// The quorum value (for GET_PART) is a non-zero value when the quorum write is enabled.
size_t quorum = 0;
/// If this MUTATE_PART entry caused by alter(modify/drop) query.
bool isAlterMutation() const
{
return type == MUTATE_PART && alter_version != -1;
}
};
struct ReplicatedMergeTreeLogEntry : public ReplicatedMergeTreeLogEntryData, std::enable_shared_from_this<ReplicatedMergeTreeLogEntry>
{
using Ptr = std::shared_ptr<ReplicatedMergeTreeLogEntry>;
std::condition_variable execution_complete; /// Awake when currently_executing becomes false.
static Ptr parse(const String & s, const Coordination::Stat & stat, MergeTreeDataFormatVersion format_version);
};
using ReplicatedMergeTreeLogEntryPtr = std::shared_ptr<ReplicatedMergeTreeLogEntry>;
}