ClickHouse/src/Storages/MergeTree/LeaderElection.h

156 lines
4.6 KiB
C++
Raw Normal View History

2014-04-04 10:37:33 +00:00
#pragma once
#include <functional>
#include <memory>
2015-09-29 19:19:54 +00:00
#include <common/logger_useful.h>
#include <Common/CurrentMetrics.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
2018-08-20 15:34:37 +00:00
#include <Core/BackgroundSchedulePool.h>
2016-10-24 13:47:15 +00:00
namespace ProfileEvents
{
extern const Event LeaderElectionAcquiredLeadership;
2016-10-24 13:47:15 +00:00
}
namespace CurrentMetrics
{
extern const Metric LeaderElection;
2016-10-24 13:47:15 +00:00
}
2014-04-04 10:37:33 +00:00
namespace zkutil
{
2020-06-12 19:19:01 +00:00
/** Initially was used to implement leader election algorithm described here:
* http://zookeeper.apache.org/doc/r3.4.5/recipes.html#sc_leaderElection
*
* But then we decided to get rid of leader election, so every replica can become leader.
* For now, every replica can become leader if there is no leader among replicas with old version.
*
2020-06-12 20:32:31 +00:00
* It's tempting to remove this class at all, but we have to maintain it,
* to maintain compatibility when replicas with different versions work on the same cluster
* (this is allowed for short time period during cluster update).
*
2020-06-12 19:19:01 +00:00
* Replicas with old versions participate in leader election with ephemeral sequential nodes.
2020-06-12 20:32:31 +00:00
* If the node is first, then replica is the leader.
2020-06-12 19:19:01 +00:00
* Replicas with new versions creates persistent sequential nodes.
* If the first node is persistent, then all replicas with new versions become leaders.
2014-04-04 10:37:33 +00:00
*/
class LeaderElection
{
public:
using LeadershipHandler = std::function<void()>;
/** handler is called when this instance become leader.
*
* identifier - if not empty, must uniquely (within same path) identify participant of leader election.
* It means that different participants of leader election have different identifiers
2018-04-05 20:47:06 +00:00
* and existence of more than one ephemeral node with same identifier indicates an error.
*/
2020-06-12 19:02:00 +00:00
LeaderElection(
DB::BackgroundSchedulePool & pool_,
const std::string & path_,
ZooKeeper & zookeeper_,
LeadershipHandler handler_,
const std::string & identifier_)
: pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_)
, log_name("LeaderElection (" + path + ")")
2020-05-30 21:57:37 +00:00
, log(&Poco::Logger::get(log_name))
{
task = pool.createTask(log_name, [this] { threadFunction(); });
createNode();
}
void shutdown()
{
if (shutdown_called)
return;
shutdown_called = true;
task->deactivate();
}
~LeaderElection()
{
2020-06-12 19:38:36 +00:00
shutdown();
}
2014-04-04 10:37:33 +00:00
private:
DB::BackgroundSchedulePool & pool;
DB::BackgroundSchedulePool::TaskHolder task;
2020-06-12 19:19:01 +00:00
const std::string path;
ZooKeeper & zookeeper;
LeadershipHandler handler;
std::string identifier;
std::string log_name;
2020-05-30 21:57:37 +00:00
Poco::Logger * log;
std::atomic<bool> shutdown_called {false};
CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection};
void createNode()
{
shutdown_called = false;
2020-06-12 20:32:31 +00:00
/// If there is at least one persistent node, we don't have to create another.
Strings children = zookeeper.getChildren(path);
for (const auto & child : children)
{
Coordination::Stat stat;
zookeeper.get(path + "/" + child, &stat);
if (!stat.ephemeralOwner)
2020-06-12 20:42:31 +00:00
{
ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership);
handler();
2020-06-12 20:32:31 +00:00
return;
2020-06-12 20:42:31 +00:00
}
2020-06-12 20:32:31 +00:00
}
2020-06-12 19:19:01 +00:00
zookeeper.create(path + "/leader_election-", identifier, CreateMode::PersistentSequential);
task->activateAndSchedule();
}
void threadFunction()
{
try
{
Strings children = zookeeper.getChildren(path);
2020-06-12 19:19:01 +00:00
if (children.empty())
throw Poco::Exception("Assertion failed in LeaderElection");
2020-06-12 19:19:01 +00:00
std::sort(children.begin(), children.end());
Coordination::Stat stat;
zookeeper.get(path + "/" + children.front(), &stat);
if (!stat.ephemeralOwner)
{
2020-06-12 20:42:31 +00:00
/// It is persistent node - we can become leader.
ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership);
handler();
return;
}
}
catch (const KeeperException & e)
{
DB::tryLogCurrentException(log);
if (e.code == Coordination::Error::ZSESSIONEXPIRED)
return;
}
catch (...)
{
DB::tryLogCurrentException(log);
}
2020-06-12 19:19:01 +00:00
task->scheduleAfter(10 * 1000);
}
2014-04-04 10:37:33 +00:00
};
using LeaderElectionPtr = std::shared_ptr<LeaderElection>;
2014-04-04 10:37:33 +00:00
}