diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cabdf67a315..8825bdac4e3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2520,7 +2520,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) renameTempPartAndReplace(part_desc->res_part, transaction); getCommitPartOps(ops, part_desc->res_part); - lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); + lockSharedData(*part_desc->res_part, /* replace_existing_lock */ true, part_desc->hardlinked_files); } @@ -9687,6 +9687,15 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( /// In rare case other replica can remove path between createAncestors and createIfNotExists /// So we make up to 5 attempts + auto is_ephemeral = [&](const String & node_path, bool default_res) -> bool + { + String dummy_res; + Coordination::Stat node_stat; + if (zookeeper->tryGet(node_path, dummy_res, &node_stat)) + return node_stat.ephemeralOwner; + return default_res; + }; + bool created = false; for (int attempts = 5; attempts > 0; --attempts) { @@ -9706,6 +9715,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( if (error == Coordination::Error::ZNODEEXISTS) { + if (is_ephemeral(zookeeper_node, /* default_res */ false)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Node {} is ephemeral", zookeeper_node); + size_t failed_op = zkutil::getFailedOpIndex(error, responses); /// Part was locked before, unfortunately it's possible during moves if (ops[failed_op]->getPath() == zookeeper_node) @@ -9725,6 +9737,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( size_t failed_op = zkutil::getFailedOpIndex(error, responses); if (ops[failed_op]->getPath() == zookeeper_node) { + if (!is_ephemeral(zookeeper_node, /* default_res */ true)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Node {} is not ephemeral", zookeeper_node); + LOG_WARNING(&Poco::Logger::get("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node); replace_existing_lock = true; continue; diff --git a/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference b/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference new file mode 100644 index 00000000000..d755a4551f1 --- /dev/null +++ b/tests/queries/0_stateless/02413_replace_partition_zero_copy.reference @@ -0,0 +1,4 @@ +1 +1 +6 0 +12 0 diff --git a/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql b/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql new file mode 100644 index 00000000000..e07ae9c7dcd --- /dev/null +++ b/tests/queries/0_stateless/02413_replace_partition_zero_copy.sql @@ -0,0 +1,29 @@ +-- Tags: no-replicated-database +-- Tag no-replicated-database: different number of replicas + +create table src1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '1') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1; +create table src2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '2') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1; +create table dst1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '1') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1; +create table dst2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '2') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1; + +insert into src1 values(1); +insert into src2 values(2); +system sync replica src1 lightweight; + +alter table dst1 replace partition id 'all' from src1; +system sync replica dst2; + +select count() != 0 from dst1; +select count() != 0 from dst2; + +-- ensure that locks exist and they are not ephemeral +set allow_unrestricted_reads_from_keeper=1; +select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' || + (select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/dst' and name='table_shared_id') || '/%'; + +-- check the same for move partition +alter table dst2 move partition id 'all' to table src2; +system sync replica src1; + +select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' || + (select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/src' and name='table_shared_id') || '/%';