Better conditions when cancelling a backup.

This commit is contained in:
Vitaly Baranov 2024-11-17 17:33:38 +01:00
parent 61b3231a72
commit a7831c991f
2 changed files with 19 additions and 5 deletions

View File

@ -685,13 +685,13 @@ void BackupCoordinationStageSync::cancelQueryIfError()
{
std::lock_guard lock{mutex};
if (!state.host_with_error)
return;
if (state.host_with_error)
exception = state.hosts.at(*state.host_with_error).exception;
}
chassert(exception);
if (!exception)
return;
process_list_element->cancelQuery(false, exception);
state_changed.notify_all();
}
@ -741,6 +741,11 @@ void BackupCoordinationStageSync::cancelQueryIfDisconnectedTooLong()
if (!exception)
return;
/// In this function we only pass the new `exception` (about that the connection was lost) to `process_list_element`.
/// We don't try to create the 'error' node here (because this function is called from watchingThread() and
/// we don't want the watching thread to try waiting here for retries or a reconnection).
/// Also we don't set the `state.host_with_error` field here because `state.host_with_error` can only be set
/// AFTER creating the 'error' node (see the comment for `State`).
process_list_element->cancelQuery(false, exception);
state_changed.notify_all();
}
@ -870,6 +875,9 @@ bool BackupCoordinationStageSync::checkIfHostsReachStage(const Strings & hosts,
continue;
}
if (state.host_with_error)
std::rethrow_exception(state.hosts.at(*state.host_with_error).exception);
if (host_info.finished)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"{} finished without coming to stage {}", getHostDesc(host), stage_to_wait);
@ -1150,6 +1158,9 @@ bool BackupCoordinationStageSync::checkIfOtherHostsFinish(
if ((host == current_host) || host_info.finished)
continue;
if (throw_if_error && state.host_with_error)
std::rethrow_exception(state.hosts.at(*state.host_with_error).exception);
String reason_text = reason.empty() ? "" : (" " + reason);
String host_status;

View File

@ -197,6 +197,9 @@ private:
};
/// Information about all the host participating in the current BACKUP or RESTORE operation.
/// This information is read from ZooKeeper.
/// To simplify the programming logic `state` can only be updated AFTER changing corresponding nodes in ZooKeeper
/// (for example, first we create the 'error' node, and only after that we set or read from ZK the `state.host_with_error` field).
struct State
{
std::map<String /* host */, HostInfo> hosts; /// std::map because we need to compare states