Assign UUIDs to parts only when configured to do so

Avoid breaking backwards compatibility by default for now.
This commit is contained in:
Nicolae Vartolomei 2020-11-02 14:38:18 +00:00
parent b6a330de77
commit 94293ca3ce
15 changed files with 110 additions and 57 deletions

View File

@ -325,7 +325,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart(
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)
readStringBinary(part_type, in);
UUID part_uuid;
UUID part_uuid = UUIDHelpers::Nil;
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID)
readUUIDText(part_uuid, in);

View File

@ -167,7 +167,7 @@ public:
/// Part unique identifier.
/// The intention is to use it for identifying cases where the same part is
/// processed by multiple shards.
UUID uuid;
UUID uuid = UUIDHelpers::Nil;
VolumePtr volume;

View File

@ -1821,10 +1821,8 @@ void MergeTreeDataMergerMutator::finalizeMutatedPart(
{
auto disk = new_data_part->volume->getDisk();
if (new_data_part->uuid != UUIDHelpers::Nil)
{
if (new_data_part->uuid == UUIDHelpers::Nil)
throw Exception("Empty IMergeTreeDataPart#uuid in finalize for part: " + new_data_part->name, ErrorCodes::LOGICAL_ERROR);
auto out = disk->writeFile(new_data_part->getFullRelativePath() + IMergeTreeDataPart::UUID_FILE_NAME, 4096);
writeUUIDText(new_data_part->uuid, *out);
}

View File

@ -19,7 +19,7 @@ class MergeProgressCallback;
struct FutureMergedMutatedPart
{
String name;
UUID uuid;
UUID uuid = UUIDHelpers::Nil;
String path;
MergeTreeDataPartType type;
MergeTreePartInfo part_info;

View File

@ -107,6 +107,7 @@ struct Settings;
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
\
/** Settings for testing purposes */ \
M(Bool, randomize_part_type, false, "For testing purposes only. Randomizes part type between wide and compact", 0) \

View File

@ -147,17 +147,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
auto part_disk = storage.reserveSpace(0)->getDisk();
auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, disk, 0);
/// Likely part written by older ClickHouse version which didn't support UUIDs.
if (metadata.part_uuid == UUIDHelpers::Nil)
{
/// Defensive check. Since WAL version 1 we expect all parts to have UUID.
if (version > 0)
throw Exception("Unexpected empty part_uuid in entry version: " + toString(version), ErrorCodes::CORRUPTED_DATA);
metadata.part_uuid = UUIDHelpers::generateV4();
}
/// TODO(nv) Create part should check for empty UUIDs and crash.
part = storage.createPart(
part_name,
MergeTreeDataPartType::IN_MEMORY,

View File

@ -133,10 +133,8 @@ void MergedBlockOutputStream::finalizePartOnDisk(
MergeTreeData::DataPart::Checksums & checksums,
bool sync)
{
if (new_part->uuid != UUIDHelpers::Nil)
{
if (new_part->uuid == UUIDHelpers::Nil)
throw Exception("Empty IMergeTreeDataPart#uuid in finalize for part: " + new_part->name, ErrorCodes::LOGICAL_ERROR);
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::UUID_FILE_NAME, 4096);
writeUUIDText(new_part->uuid, *out);
out->finalize();

View File

@ -19,7 +19,14 @@ namespace ErrorCodes
void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
{
out << "format version: 4\n"
UInt8 format_version = 4;
/// Conditionally bump format_version only when uuid has been assigned.
/// If some other feature requires bumping format_version to >= 5 then this code becomes no-op.
if (new_part_uuid != UUIDHelpers::Nil)
format_version = std::min(format_version, static_cast<UInt8>(5));
out << "format version: " << format_version << "\n"
<< "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n"
<< "source replica: " << source_replica << '\n'
<< "block_id: " << escape << block_id << '\n';
@ -31,8 +38,6 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
break;
case MERGE_PARTS:
assert(new_part_uuid != UUIDHelpers::Nil);
out << "merge\n";
for (const String & s : source_parts)
out << s << '\n';
@ -42,7 +47,9 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
if (merge_type != MergeType::REGULAR)
out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
out << "\ninto_uuid: " << new_part_uuid;
if (new_part_uuid != UUIDHelpers::Nil)
out << "\ninto_uuid: " << new_part_uuid;
break;
case DROP_RANGE:
@ -75,14 +82,14 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
break;
case MUTATE_PART:
assert(new_part_uuid != UUIDHelpers::Nil);
out << "mutate\n"
<< source_parts.at(0) << "\n"
<< "to\n"
<< new_part_name
<< "\nto_uuid\n"
<< new_part_uuid;
<< new_part_name;
if (new_part_uuid != UUIDHelpers::Nil)
out << "\nto_uuid\n"
<< new_part_uuid;
if (isAlterMutation())
out << "\nalter_version\n" << alter_version;
@ -122,7 +129,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
in >> "format version: " >> format_version >> "\n";
if (format_version < 1 || format_version > 4)
if (format_version < 1 || format_version > 5)
throw Exception("Unknown ReplicatedMergeTreeLogEntry format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT_VERSION);
if (format_version >= 2)

View File

@ -77,7 +77,7 @@ struct ReplicatedMergeTreeLogEntryData
MergeTreeDataPartType new_part_type;
String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/).
mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'.
UUID new_part_uuid;
UUID new_part_uuid = UUIDHelpers::Nil;
Strings source_parts;
bool deduplicate = false; /// Do deduplicate on merge

View File

@ -641,7 +641,9 @@ std::shared_ptr<StorageMergeTree::MergeMutateSelectedEntry> StorageMergeTree::se
auto data_settings = getSettings();
FutureMergedMutatedPart future_part;
future_part.uuid = UUIDHelpers::generateV4();
if (storage_settings.get()->assign_part_uuids)
future_part.uuid = UUIDHelpers::generateV4();
/// You must call destructor with unlocked `currently_processing_in_background_mutex`.
CurrentlyMergingPartsTaggerPtr merging_tagger;
@ -797,6 +799,9 @@ std::shared_ptr<StorageMergeTree::MergeMutateSelectedEntry> StorageMergeTree::se
size_t max_ast_elements = global_context.getSettingsRef().max_expanded_ast_elements;
FutureMergedMutatedPart future_part;
if (storage_settings.get()->assign_part_uuids)
future_part.uuid = UUIDHelpers::generateV4();
MutationCommands commands;
CurrentlyMergingPartsTaggerPtr tagger;
@ -863,8 +868,7 @@ std::shared_ptr<StorageMergeTree::MergeMutateSelectedEntry> StorageMergeTree::se
future_part.parts.push_back(part);
future_part.part_info = new_part_info;
future_part.name = part->getNewName(new_part_info);
future_part.uuid = UUIDHelpers::generateV4();
future_part.type = part->getType();
future_part.type = part->getType();
tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true);
return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands);

View File

@ -2707,11 +2707,22 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
getTotalMergesWithTTLInMergeList() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool;
FutureMergedMutatedPart future_merged_part;
if (storage_settings.get()->assign_part_uuids)
future_merged_part.uuid = UUIDHelpers::generateV4();
if (max_source_parts_size_for_merge > 0 &&
merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, merge_with_ttl_allowed, nullptr))
{
create_result = createLogEntryToMergeParts(zookeeper, future_merged_part.parts,
future_merged_part.name, future_merged_part.type, deduplicate, nullptr, merge_pred.getVersion(), future_merged_part.merge_type);
create_result = createLogEntryToMergeParts(
zookeeper,
future_merged_part.parts,
future_merged_part.name,
future_merged_part.uuid,
future_merged_part.type,
deduplicate,
nullptr,
merge_pred.getVersion(),
future_merged_part.merge_type);
}
/// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts
else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0
@ -2728,8 +2739,12 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
if (!desired_mutation_version)
continue;
create_result = createLogEntryToMutatePart(*part,
desired_mutation_version->first, desired_mutation_version->second, merge_pred.getVersion());
create_result = createLogEntryToMutatePart(
*part,
future_merged_part.uuid,
desired_mutation_version->first,
desired_mutation_version->second,
merge_pred.getVersion());
if (create_result == CreateMergeEntryResult::Ok)
break;
@ -2791,6 +2806,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
zkutil::ZooKeeperPtr & zookeeper,
const DataPartsVector & parts,
const String & merged_name,
const UUID & merged_part_uuid,
const MergeTreeDataPartType & merged_part_type,
bool deduplicate,
ReplicatedMergeTreeLogEntryData * out_log_entry,
@ -2826,7 +2842,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
entry.type = LogEntry::MERGE_PARTS;
entry.source_replica = replica_name;
entry.new_part_name = merged_name;
entry.new_part_uuid = UUIDHelpers::generateV4();
entry.new_part_uuid = merged_part_uuid;
entry.new_part_type = merged_part_type;
entry.merge_type = merge_type;
entry.deduplicate = deduplicate;
@ -2875,7 +2891,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::createLogEntryToMutatePart(
const IMergeTreeDataPart & part, Int64 mutation_version, int32_t alter_version, int32_t log_version)
const IMergeTreeDataPart & part, const UUID & new_part_uuid, Int64 mutation_version, int32_t alter_version, int32_t log_version)
{
auto zookeeper = getZooKeeper();
@ -2902,7 +2918,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
entry.source_replica = replica_name;
entry.source_parts.push_back(part.name);
entry.new_part_name = new_part_name;
entry.new_part_uuid = UUIDHelpers::generateV4();
entry.new_part_uuid = new_part_uuid;
entry.create_time = time(nullptr);
entry.alter_version = alter_version;
@ -3846,6 +3862,9 @@ bool StorageReplicatedMergeTree::optimize(
ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper);
FutureMergedMutatedPart future_merged_part;
if (storage_settings.get()->assign_part_uuids)
future_merged_part.uuid = UUIDHelpers::generateV4();
bool selected = merger_mutator.selectAllPartsToMergeWithinPartition(
future_merged_part, disk_space, can_merge, partition_id, true, nullptr);
@ -3855,7 +3874,7 @@ bool StorageReplicatedMergeTree::optimize(
ReplicatedMergeTreeLogEntryData merge_entry;
CreateMergeEntryResult create_result = createLogEntryToMergeParts(
zookeeper, future_merged_part.parts,
future_merged_part.name, future_merged_part.type, deduplicate,
future_merged_part.name, future_merged_part.uuid, future_merged_part.type, deduplicate,
&merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
if (create_result == CreateMergeEntryResult::MissingPart)
@ -3881,6 +3900,9 @@ bool StorageReplicatedMergeTree::optimize(
ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper);
FutureMergedMutatedPart future_merged_part;
if (storage_settings.get()->assign_part_uuids)
future_merged_part.uuid = UUIDHelpers::generateV4();
String disable_reason;
bool selected = false;
if (!partition)
@ -3909,7 +3931,7 @@ bool StorageReplicatedMergeTree::optimize(
ReplicatedMergeTreeLogEntryData merge_entry;
CreateMergeEntryResult create_result = createLogEntryToMergeParts(
zookeeper, future_merged_part.parts,
future_merged_part.name, future_merged_part.type, deduplicate,
future_merged_part.name, future_merged_part.uuid, future_merged_part.type, deduplicate,
&merge_entry, can_merge.getVersion(), future_merged_part.merge_type);
if (create_result == CreateMergeEntryResult::MissingPart)

View File

@ -464,6 +464,7 @@ private:
zkutil::ZooKeeperPtr & zookeeper,
const DataPartsVector & parts,
const String & merged_name,
const UUID & merged_part_uuid,
const MergeTreeDataPartType & merged_part_type,
bool deduplicate,
ReplicatedMergeTreeLogEntryData * out_log_entry,
@ -472,6 +473,7 @@ private:
CreateMergeEntryResult createLogEntryToMutatePart(
const IMergeTreeDataPart & part,
const UUID & new_part_uuid,
Int64 mutation_version,
int32_t alter_version,
int32_t log_version);

View File

@ -0,0 +1,6 @@
<yandex>
<merge_tree>
<assign_part_uuids>1</assign_part_uuids>
<randomize_part_type>1</randomize_part_type>
</merge_tree>
</yandex>

View File

@ -15,4 +15,4 @@
</shard>
</test_cluster>
</remote_servers>
</yandex>
</yandex>

View File

@ -1,4 +1,3 @@
import time
import uuid
import pytest
@ -9,12 +8,12 @@ cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
'node1',
main_configs=['configs/remote_servers.xml'],
main_configs=['configs/remote_servers.xml', 'configs/merge_tree.xml'],
with_zookeeper=True)
node2 = cluster.add_instance(
'node2',
main_configs=['configs/remote_servers.xml'],
main_configs=['configs/remote_servers.xml', 'configs/merge_tree.xml'],
with_zookeeper=True)
@ -28,7 +27,7 @@ def started_cluster():
def test_part_uuid(started_cluster):
test_uuid = str(uuid.uuid4())
uuid_zero = uuid.UUID(bytes=b"\x00" * 16)
for ix, n in enumerate([node1, node2]):
n.query("""
@ -37,17 +36,44 @@ def test_part_uuid(started_cluster):
ORDER BY tuple()
""".format(ix))
# Test insert assigns uuid to part.
node1.query("INSERT INTO t VALUES (1, 1)")
node1.query("ALTER TABLE t DETACH PARTITION tuple()")
node1.exec_in_container([
"bash", "-c",
"echo '{}' > /var/lib/clickhouse/data/default/t/detached/{}/uuid.txt".format(test_uuid, "all_0_0_0")
])
node1.query("ALTER TABLE t ATTACH PARTITION tuple()")
node1.query("ALTER TABLE t UPDATE value = 1 WHERE key = 1")
uuids = set()
for node in [node1, node2]:
node.query("SYSTEM SYNC REPLICA t")
part_initial_uuid = uuid.UUID(node.query("SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip())
uuids.add(part_initial_uuid)
assert uuid_zero != part_initial_uuid
assert len(uuids) == 1, "expect the same uuid on all the replicas"
assert test_uuid == node1.query("SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip()
# Test detach / attach.
node1.query("ALTER TABLE t DETACH PARTITION tuple(); ALTER TABLE t ATTACH PARTITION tuple()")
for node in [node1, node2]:
node.query("SYSTEM SYNC REPLICA t")
part_reattach_uuid = uuid.UUID(node.query(
"SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip())
assert part_initial_uuid == part_reattach_uuid
# Test mutation assigns new non-zero uuids.
node1.query("ALTER TABLE t UPDATE value = 1 WHERE key = 1 SETTINGS mutations_sync = 2")
part_mutate_uuid = uuid.UUID(node1.query("SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip())
assert part_mutate_uuid not in [uuid_zero, part_initial_uuid]
node2.query("SYSTEM SYNC REPLICA t")
assert test_uuid == node2.query("SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip()
assert part_mutate_uuid == uuid.UUID(node2.query(
"SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip())
# Test merge assigns new non-zero uuids.
node2.query("INSERT INTO t VALUES (1, 1)")
node2.query("OPTIMIZE TABLE t FINAL")
uuids = set()
for node in [node1, node2]:
node.query("SYSTEM SYNC REPLICA t")
part_merge_uuid = uuid.UUID(node.query(
"SELECT uuid FROM system.parts WHERE table = 't' AND active ORDER BY name").strip())
uuids.add(part_merge_uuid)
assert part_mutate_uuid not in [uuid_zero, part_merge_uuid]
assert len(uuids) == 1, "expect the same uuid on all the replicas"