Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER.

This commit is contained in:
Vitaly Baranov 2023-12-12 00:19:16 +01:00
parent 30148972ed
commit 16afd81322
3 changed files with 43 additions and 72 deletions

View File

@ -184,11 +184,9 @@ BackupCoordinationRemote::BackupCoordinationRemote(
if (my_is_internal)
{
String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
zk->createAncestors(alive_node_path);
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code == Coordination::Error::ZNODEEXISTS)
zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
else if (code != Coordination::Error::ZOK)
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, alive_node_path);
}
})

View File

@ -60,12 +60,6 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
}
else
{
/// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException::fromPath(code, alive_node_path);
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
}
@ -118,27 +112,24 @@ struct BackupCoordinationStageSync::State
Strings results;
std::map<String, UnreadyHostState> unready_hosts;
std::optional<std::pair<String, Exception>> error;
std::optional<String> host_terminated;
std::optional<String> disconnected_host;
};
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
WithRetries::RetriesControlHolder & retries_control_holder,
const Strings & zk_nodes,
const Strings & all_hosts,
const String & stage_to_wait) const
{
auto zookeeper = retries_control_holder.faulty_zookeeper;
auto & retries_ctl = retries_control_holder.retries_ctl;
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
State state;
if (zk_nodes_set.contains("error"))
{
String errors;
{
auto holder = with_retries.createRetriesControlHolder("readCurrentState");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
errors = zookeeper->get(zookeeper_path + "/error");
});
}
String errors = zookeeper->get(zookeeper_path + "/error");
ReadBufferFromOwnString buf{errors};
String host;
readStringBinary(host, buf);
@ -150,60 +141,40 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
{
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
{
UnreadyHostState unready_host_state;
const String started_node_name = "started|" + host;
const String alive_node_name = "alive|" + host;
const String alive_node_path = zookeeper_path + "/" + alive_node_name;
UnreadyHostState unready_host_state;
unready_host_state.started = zk_nodes_set.contains(started_node_name);
/// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore.
/// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible.
/// And everything we can do here - just retry.
/// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
if (!unready_host_state.alive)
{
LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host);
auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
if (zookeeper->existsNoFailureInjection(alive_node_path))
{
unready_host_state.alive = true;
return;
}
// Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception.
if (!holder.retries_ctl.isLastRetry())
holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host);
});
}
LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead");
state.unready_hosts.emplace(host, unready_host_state);
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
state.host_terminated = host;
if (!unready_host_state.alive && !state.disconnected_host)
{
/// If the "alive" node doesn't exist then we don't have connection to the corresponding host.
/// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait.
/// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
state.disconnected_host = host;
String message;
if (unready_host_state.started)
message = fmt::format("Lost connection to host {}", host);
else
message = fmt::format("No connection to host {} yet", host);
if (!retries_ctl.isLastRetry())
message += ", will retry";
retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message);
}
}
}
if (state.host_terminated || !state.unready_hosts.empty())
if (state.disconnected_host || !state.unready_hosts.empty())
return state;
auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
Strings results;
for (const auto & host : all_hosts)
results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
state.results = std::move(results);
});
Strings results;
for (const auto & host : all_hosts)
results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
state.results = std::move(results);
return state;
}
@ -229,7 +200,7 @@ Strings BackupCoordinationStageSync::waitImpl(
auto watch = std::make_shared<Poco::Event>();
Strings zk_nodes;
{
auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren");
auto holder = with_retries.createRetriesControlHolder("waitImpl");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
@ -237,12 +208,14 @@ Strings BackupCoordinationStageSync::waitImpl(
watch->reset();
/// Get zk nodes and subscribe on their changes.
zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
/// Read the current state of zk nodes.
state = readCurrentState(holder, zk_nodes, all_hosts, stage_to_wait);
});
}
/// Read and analyze the current state of zk nodes.
state = readCurrentState(zk_nodes, all_hosts, stage_to_wait);
if (state.error || state.host_terminated || state.unready_hosts.empty())
/// Analyze the current state of zk nodes.
if (state.error || state.disconnected_host || state.unready_hosts.empty())
break; /// Error happened or everything is ready.
/// Log that we will wait
@ -270,8 +243,8 @@ Strings BackupCoordinationStageSync::waitImpl(
state.error->second.rethrow();
/// Another host terminated without errors.
if (state.host_terminated)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
if (state.disconnected_host)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host);
/// Something's unready, timeout is probably not enough.
if (!state.unready_hosts.empty())

View File

@ -29,7 +29,7 @@ private:
void createRootNodes();
struct State;
State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
State readCurrentState(WithRetries::RetriesControlHolder & retries_control_holder, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;