CLICKHOUSE-3847 fix bug when replica recover and it's log_pointer will be deleted

This commit is contained in:
VadimPE 2018-08-27 15:09:22 +03:00
parent e7d6bc3dbc
commit cf0a7c1529
2 changed files with 35 additions and 8 deletions

View File

@ -82,6 +82,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat); Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat);
UInt64 min_saved_log_pointer = std::numeric_limits<UInt64>::max(); UInt64 min_saved_log_pointer = std::numeric_limits<UInt64>::max();
UInt64 min_inactive_log_pointer = std::numeric_limits<UInt64>::max();
Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log"); Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log");
@ -94,6 +95,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
? entries.size() - storage.data.settings.max_replicated_logs_to_keep.value ? entries.size() - storage.data.settings.max_replicated_logs_to_keep.value
: 0]; : 0];
std::unordered_set<String> recovering_replicas;
std::unordered_map<String, UInt32> host_versions_inactive_replicas; std::unordered_map<String, UInt32> host_versions_inactive_replicas;
std::unordered_map<String, String> log_pointers_lost_replicas; std::unordered_map<String, String> log_pointers_lost_replicas;
@ -111,13 +114,25 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
/// Check status of replica (active or not). /// Check status of replica (active or not).
/// If replica was not active, we could check when it's log_pointer locates. /// If replica was not active, we could check when it's log_pointer locates.
String res;
bool new_version_of_replica = zookeeper->tryGet(storage.zookeeper_path + "/replicas/" + replica + "/is_lost", res);
if (zookeeper->exists(storage.zookeeper_path + "/replicas/" + replica + "/is_active")) if (zookeeper->exists(storage.zookeeper_path + "/replicas/" + replica + "/is_active"))
if (new_version_of_replica && res == "1")
recovering_replicas.insert(replica);
else
min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer); min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer);
else else
{ {
String res; if (!new_version_of_replica)
if (!zookeeper->tryGet(storage.zookeeper_path + "/replicas/" + replica + "/is_lost", res)) {
/// Only to support old versions CH.
/// If replica did not have "/is_lost" we must save it's log_pointer.
/// Because old version CH can not work with recovering.
min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer); min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer);
}
else else
if (res == "0") if (res == "0")
{ {
@ -128,15 +143,27 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
{ {
host_versions_inactive_replicas[replica] = host_stat.version; host_versions_inactive_replicas[replica] = host_stat.version;
log_pointers_lost_replicas[replica] = log_pointer_str; log_pointers_lost_replicas[replica] = log_pointer_str;
min_inactive_log_pointer = std::min(min_saved_log_pointer, log_pointer);
} }
} }
/// Only to support old versions CH.
else else
++replicas_were_marked_is_lost; ++replicas_were_marked_is_lost;
host_versions_inactive_replicas[replica] = host_stat.version; host_versions_inactive_replicas[replica] = host_stat.version;
} }
} }
/// We must check log_pointer recovering replicas at the end.
/// Because log pointer recovering replicas can move backward.
for (const String & replica : recovering_replicas)
{
String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer");
UInt32 log_pointer = parse<UInt64>(pointer);
min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer);
}
if (recovering_replicas.size() != 0)
min_saved_log_pointer = std::min(min_saved_log_pointer, min_inactive_log_pointer);
/// We will not touch the last `min_replicated_logs_to_keep` records. /// We will not touch the last `min_replicated_logs_to_keep` records.
entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end()); entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end());
/// We will not touch records that are no less than `min_pointer_active_replica`. /// We will not touch records that are no less than `min_pointer_active_replica`.

View File

@ -1999,7 +1999,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, zku
if (error == ZooKeeperImpl::ZooKeeper::ZBADVERSION) if (error == ZooKeeperImpl::ZooKeeper::ZBADVERSION)
throw Exception("Can not clone replica, because a " + source_path + " became lost", ErrorCodes::REPLICA_STATUS_CHANGED); throw Exception("Can not clone replica, because a " + source_path + " became lost", ErrorCodes::REPLICA_STATUS_CHANGED);
else if (error == ZooKeeperImpl::ZooKeeper::ZNODEEXISTS) else if (error == ZooKeeperImpl::ZooKeeper::ZNODEEXISTS)
throw Exception("Can not clone replica, because the clickHouse server updated to new version", ErrorCodes::REPLICA_STATUS_CHANGED); throw Exception("Can not clone replica, because the ClickHouse server updated to new version", ErrorCodes::REPLICA_STATUS_CHANGED);
else else
zkutil::KeeperMultiException::check(error, ops, resp); zkutil::KeeperMultiException::check(error, ops, resp);