update zero copy locks correctly

This commit is contained in:
Alexander Tokmakov 2023-09-01 20:10:23 +02:00
parent 3eebac1cea
commit bba5a1d480
3 changed files with 49 additions and 1 deletions

View File

@ -2520,7 +2520,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
renameTempPartAndReplace(part_desc->res_part, transaction);
getCommitPartOps(ops, part_desc->res_part);
lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files);
lockSharedData(*part_desc->res_part, /* replace_existing_lock */ true, part_desc->hardlinked_files);
}
@ -9687,6 +9687,15 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode(
/// In rare case other replica can remove path between createAncestors and createIfNotExists
/// So we make up to 5 attempts
auto is_ephemeral = [&](const String & node_path, bool default_res) -> bool
{
String dummy_res;
Coordination::Stat node_stat;
if (zookeeper->tryGet(node_path, dummy_res, &node_stat))
return node_stat.ephemeralOwner;
return default_res;
};
bool created = false;
for (int attempts = 5; attempts > 0; --attempts)
{
@ -9706,6 +9715,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode(
if (error == Coordination::Error::ZNODEEXISTS)
{
if (is_ephemeral(zookeeper_node, /* default_res */ false))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node {} is ephemeral", zookeeper_node);
size_t failed_op = zkutil::getFailedOpIndex(error, responses);
/// Part was locked before, unfortunately it's possible during moves
if (ops[failed_op]->getPath() == zookeeper_node)
@ -9725,6 +9737,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode(
size_t failed_op = zkutil::getFailedOpIndex(error, responses);
if (ops[failed_op]->getPath() == zookeeper_node)
{
if (!is_ephemeral(zookeeper_node, /* default_res */ true))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node {} is not ephemeral", zookeeper_node);
LOG_WARNING(&Poco::Logger::get("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node);
replace_existing_lock = true;
continue;

View File

@ -0,0 +1,4 @@
1
1
6 0
12 0

View File

@ -0,0 +1,29 @@
-- Tags: no-replicated-database
-- Tag no-replicated-database: different number of replicas
create table src1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '1') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1;
create table src2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/src', '2') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1;
create table dst1 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '1') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1;
create table dst2 (n int) engine=ReplicatedMergeTree('/test/02413/{database}/dst', '2') order by tuple() settings storage_policy='s3', allow_remote_fs_zero_copy_replication=1;
insert into src1 values(1);
insert into src2 values(2);
system sync replica src1 lightweight;
alter table dst1 replace partition id 'all' from src1;
system sync replica dst2;
select count() != 0 from dst1;
select count() != 0 from dst2;
-- ensure that locks exist and they are not ephemeral
set allow_unrestricted_reads_from_keeper=1;
select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' ||
(select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/dst' and name='table_shared_id') || '/%';
-- check the same for move partition
alter table dst2 move partition id 'all' to table src2;
system sync replica src1;
select count(), sum(ephemeralOwner) from system.zookeeper where path like '/clickhouse/zero_copy/zero_copy_s3/' ||
(select value from system.zookeeper where path='/test/02413/'||currentDatabase()||'/src' and name='table_shared_id') || '/%';