2014-04-04 10:37:33 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-06-19 20:06:35 +00:00
|
|
|
#include <Common/ZooKeeper/ZooKeeper.h>
|
2014-04-04 10:37:33 +00:00
|
|
|
#include <functional>
|
2016-05-28 17:31:50 +00:00
|
|
|
#include <memory>
|
2015-09-29 19:19:54 +00:00
|
|
|
#include <common/logger_useful.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/CurrentMetrics.h>
|
2016-10-24 13:47:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Event ObsoleteEphemeralNode;
|
|
|
|
extern const Event LeaderElectionAcquiredLeadership;
|
2016-10-24 13:47:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace CurrentMetrics
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Metric LeaderElection;
|
2016-10-24 13:47:15 +00:00
|
|
|
}
|
2014-04-04 10:37:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace zkutil
|
|
|
|
{
|
|
|
|
|
2016-10-23 03:46:51 +00:00
|
|
|
/** Implements leader election algorithm described here: http://zookeeper.apache.org/doc/r3.4.5/recipes.html#sc_leaderElection
|
2014-04-04 10:37:33 +00:00
|
|
|
*/
|
|
|
|
class LeaderElection
|
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
using LeadershipHandler = std::function<void()>;
|
|
|
|
|
|
|
|
/** handler is called when this instance become leader.
|
|
|
|
*
|
|
|
|
* identifier - if not empty, must uniquely (within same path) identify participant of leader election.
|
|
|
|
* It means that different participants of leader election have different identifiers
|
|
|
|
* and existence of more than one ephemeral node with same identifier indicates an error
|
|
|
|
* (see cleanOldEphemeralNodes).
|
|
|
|
*/
|
|
|
|
LeaderElection(const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "")
|
|
|
|
: path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_)
|
|
|
|
{
|
|
|
|
createNode();
|
|
|
|
}
|
|
|
|
|
|
|
|
void yield()
|
|
|
|
{
|
|
|
|
releaseNode();
|
|
|
|
createNode();
|
|
|
|
}
|
|
|
|
|
|
|
|
~LeaderElection()
|
|
|
|
{
|
|
|
|
releaseNode();
|
|
|
|
}
|
2014-04-04 10:37:33 +00:00
|
|
|
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string path;
|
|
|
|
ZooKeeper & zookeeper;
|
|
|
|
LeadershipHandler handler;
|
|
|
|
std::string identifier;
|
|
|
|
|
|
|
|
EphemeralNodeHolderPtr node;
|
|
|
|
std::string node_name;
|
|
|
|
|
|
|
|
std::thread thread;
|
|
|
|
std::atomic<bool> shutdown {false};
|
|
|
|
zkutil::EventPtr event = std::make_shared<Poco::Event>();
|
|
|
|
|
|
|
|
CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection};
|
|
|
|
|
|
|
|
void createNode()
|
|
|
|
{
|
|
|
|
shutdown = false;
|
|
|
|
node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier);
|
|
|
|
|
|
|
|
std::string node_path = node->getPath();
|
|
|
|
node_name = node_path.substr(node_path.find_last_of('/') + 1);
|
|
|
|
|
|
|
|
cleanOldEphemeralNodes();
|
|
|
|
|
|
|
|
thread = std::thread(&LeaderElection::threadFunction, this);
|
|
|
|
}
|
|
|
|
|
|
|
|
void cleanOldEphemeralNodes()
|
|
|
|
{
|
|
|
|
if (identifier.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
/** If there are nodes with same identifier, remove them.
|
|
|
|
* Such nodes could still be alive after failed attempt of removal,
|
|
|
|
* if it was temporary communication failure, that was continued for more than session timeout,
|
|
|
|
* but ZK session is still alive for unknown reason, and someone still holds that ZK session.
|
|
|
|
* See comments in destructor of EphemeralNodeHolder.
|
|
|
|
*/
|
|
|
|
Strings brothers = zookeeper.getChildren(path);
|
|
|
|
for (const auto & brother : brothers)
|
|
|
|
{
|
|
|
|
if (brother == node_name)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
std::string brother_path = path + "/" + brother;
|
|
|
|
std::string brother_identifier = zookeeper.get(brother_path);
|
|
|
|
|
|
|
|
if (brother_identifier == identifier)
|
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::ObsoleteEphemeralNode);
|
|
|
|
LOG_WARNING(&Logger::get("LeaderElection"), "Found obsolete ephemeral node for identifier "
|
|
|
|
+ identifier + ", removing: " + brother_path);
|
|
|
|
zookeeper.tryRemoveWithRetries(brother_path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void releaseNode()
|
|
|
|
{
|
|
|
|
shutdown = true;
|
|
|
|
event->set();
|
|
|
|
if (thread.joinable())
|
|
|
|
thread.join();
|
|
|
|
node = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void threadFunction()
|
|
|
|
{
|
|
|
|
while (!shutdown)
|
|
|
|
{
|
|
|
|
bool success = false;
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
Strings children = zookeeper.getChildren(path);
|
|
|
|
std::sort(children.begin(), children.end());
|
|
|
|
auto it = std::lower_bound(children.begin(), children.end(), node_name);
|
|
|
|
if (it == children.end() || *it != node_name)
|
|
|
|
throw Poco::Exception("Assertion failed in LeaderElection");
|
|
|
|
|
|
|
|
if (it == children.begin())
|
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership);
|
|
|
|
handler();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (zookeeper.exists(path + "/" + *(it - 1), nullptr, event))
|
|
|
|
event->wait();
|
|
|
|
|
|
|
|
success = true;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
DB::tryLogCurrentException("LeaderElection");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!success)
|
|
|
|
event->tryWait(10 * 1000);
|
|
|
|
}
|
|
|
|
}
|
2014-04-04 10:37:33 +00:00
|
|
|
};
|
|
|
|
|
2016-05-28 17:31:50 +00:00
|
|
|
using LeaderElectionPtr = std::shared_ptr<LeaderElection>;
|
2014-04-04 10:37:33 +00:00
|
|
|
|
|
|
|
}
|