ClickHouse/libs/libzkutil/src/ZooKeeper.cpp

589 lines
16 KiB
C++
Raw Normal View History

2014-03-07 13:50:58 +00:00
#include <zkutil/ZooKeeper.h>
#include <boost/make_shared.hpp>
2014-03-22 14:44:44 +00:00
#include <Yandex/logger_useful.h>
#include <DB/Common/ProfileEvents.h>
#include <boost/bind.hpp>
2014-03-07 13:50:58 +00:00
namespace zkutil
{
const int CreateMode::Persistent = 0;
const int CreateMode::Ephemeral = ZOO_EPHEMERAL;
const int CreateMode::EphemeralSequential = ZOO_EPHEMERAL | ZOO_SEQUENCE;
const int CreateMode::PersistentSequential = ZOO_SEQUENCE;
void check(int32_t code, const std::string path = "")
{
if (code != ZOK)
{
if (path.size())
throw KeeperException(code, path);
else
throw KeeperException(code);
}
}
struct WatchWithEvent
2014-03-07 13:50:58 +00:00
{
/// существует все время существования WatchWithEvent
ZooKeeper & zk;
EventPtr event;
WatchWithEvent(ZooKeeper & zk_, EventPtr event_) : zk(zk_), event(event_) {}
2014-03-07 13:50:58 +00:00
void process(zhandle_t * zh, int32_t event_type, int32_t state, const char * path)
{
if (event)
{
event->set();
event = nullptr;
}
}
};
2014-03-07 13:50:58 +00:00
void ZooKeeper::processEvent(zhandle_t * zh, int type, int state, const char * path, void *watcherCtx)
{
if (watcherCtx)
2014-03-07 13:50:58 +00:00
{
WatchWithEvent * watch = reinterpret_cast<WatchWithEvent *>(watcherCtx);
watch->process(zh, type, state, path);
/// Гарантируется, что не-ZOO_SESSION_EVENT событие придет ровно один раз (https://issues.apache.org/jira/browse/ZOOKEEPER-890).
if (type != ZOO_SESSION_EVENT)
{
{
Poco::ScopedLock<Poco::FastMutex> lock(watch->zk.mutex);
watch->zk.watch_store.erase(watch);
}
delete watch;
}
2014-03-07 13:50:58 +00:00
}
}
2014-03-07 13:50:58 +00:00
void ZooKeeper::init(const std::string & hosts_, int32_t sessionTimeoutMs_)
2014-03-07 13:50:58 +00:00
{
2014-06-17 11:41:14 +00:00
log = &Logger::get("ZooKeeper");
2014-06-30 18:12:18 +00:00
zoo_set_debug_level(ZOO_LOG_LEVEL_ERROR);
2014-04-25 13:55:15 +00:00
hosts = hosts_;
sessionTimeoutMs = sessionTimeoutMs_;
impl = zookeeper_init(hosts.c_str(), nullptr, sessionTimeoutMs, nullptr, nullptr, 0);
ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
if (!impl)
throw KeeperException("Fail to initialize zookeeper. Hosts are " + hosts);
2014-03-07 13:50:58 +00:00
default_acl = &ZOO_OPEN_ACL_UNSAFE;
2014-03-07 13:50:58 +00:00
}
ZooKeeper::ZooKeeper(const std::string & hosts, int32_t sessionTimeoutMs)
{
init(hosts, sessionTimeoutMs);
}
struct ZooKeeperArgs
{
ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const std::string & config_name)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_name, keys);
std::string node_key = "node";
session_timeout_ms = DEFAULT_SESSION_TIMEOUT;
for (const auto & key : keys)
{
if (key == node_key || key.compare(0, node_key.size(), node_key) == 0)
{
if (hosts.size())
hosts += std::string(",");
hosts += config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port");
}
else if (key == "session_timeout_ms")
{
session_timeout_ms = config.getInt(config_name + "." + key);
}
else throw KeeperException(std::string("Unknown key ") + key + " in config file");
}
}
std::string hosts;
size_t session_timeout_ms;
};
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name)
{
ZooKeeperArgs args(config, config_name);
init(args.hosts, args.session_timeout_ms);
}
void * ZooKeeper::watchForEvent(EventPtr event)
{
if (event)
{
WatchWithEvent * res = new WatchWithEvent(*this, event);
{
Poco::ScopedLock<Poco::FastMutex> lock(mutex);
watch_store.insert(res);
if (watch_store.size() % 10000 == 0)
{
LOG_ERROR(log, "There are " << watch_store.size() << " active watches. There must be a leak somewhere.");
}
}
return reinterpret_cast<void *>(res);
}
else
{
return nullptr;
}
}
watcher_fn ZooKeeper::callbackForEvent(EventPtr event)
{
return event ? processEvent : nullptr;
}
int32_t ZooKeeper::getChildrenImpl(const std::string & path, Strings & res,
Stat * stat_,
EventPtr watch)
{
String_vector strings;
int code;
Stat stat;
code = zoo_wget_children2(impl, path.c_str(), callbackForEvent(watch), watchForEvent(watch), &strings, &stat);
ProfileEvents::increment(ProfileEvents::ZooKeeperGetChildren);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
if (code == ZOK)
{
if (stat_)
*stat_ = stat;
res.resize(strings.count);
for (int i = 0; i < strings.count; ++i)
res[i] = std::string(strings.data[i]);
deallocate_String_vector(&strings);
}
return code;
}
Strings ZooKeeper::getChildren(
const std::string & path, Stat * stat, EventPtr watch)
{
Strings res;
check(tryGetChildren(path, res, stat, watch), path);
return res;
}
int32_t ZooKeeper::tryGetChildren(const std::string & path, Strings & res,
Stat * stat_, EventPtr watch)
2014-03-07 17:57:53 +00:00
{
int32_t code = retry(boost::bind(&ZooKeeper::getChildrenImpl, this, boost::ref(path), boost::ref(res), stat_, watch));
if (!( code == ZOK ||
code == ZNONODE))
throw KeeperException(code, path);
return code;
2014-03-07 17:57:53 +00:00
}
int32_t ZooKeeper::createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & pathCreated)
2014-03-07 17:57:53 +00:00
{
int code;
/// имя ноды может быть больше переданного пути, если создается sequential нода.
size_t name_buffer_size = path.size() + SEQUENTIAL_SUFFIX_SIZE;
char * name_buffer = new char[name_buffer_size];
code = zoo_create(impl, path.c_str(), data.c_str(), data.size(), getDefaultACL(), mode, name_buffer, name_buffer_size);
ProfileEvents::increment(ProfileEvents::ZooKeeperCreate);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
2014-03-21 18:58:24 +00:00
if (code == ZOK)
{
pathCreated = std::string(name_buffer);
}
delete[] name_buffer;
return code;
}
std::string ZooKeeper::create(const std::string & path, const std::string & data, int32_t type)
{
std::string pathCreated;
check(tryCreate(path, data, type, pathCreated), path);
return pathCreated;
}
int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & pathCreated)
{
int code = createImpl(path, data, mode, pathCreated);
if (!( code == ZOK ||
code == ZNONODE ||
code == ZNODEEXISTS ||
code == ZNOCHILDRENFOREPHEMERALS))
throw KeeperException(code, path);
return code;
2014-03-07 17:57:53 +00:00
}
int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode)
2014-05-07 13:58:20 +00:00
{
std::string pathCreated;
return tryCreate(path, data, mode, pathCreated);
2014-05-07 13:58:20 +00:00
}
2014-07-03 17:24:17 +00:00
int32_t ZooKeeper::tryCreateWithRetries(const std::string& path, const std::string& data, int32_t mode, std::string& pathCreated, size_t* attempt)
{
return retry(boost::bind(&ZooKeeper::tryCreate, this, boost::ref(path), boost::ref(data), mode, boost::ref(pathCreated)), attempt);
}
void ZooKeeper::createIfNotExists(const std::string & path, const std::string & data)
{
std::string pathCreated;
int32_t code = retry(boost::bind(&ZooKeeper::createImpl, this, boost::ref(path), boost::ref(data), zkutil::CreateMode::Persistent, boost::ref(pathCreated)));
if (code == ZOK || code == ZNODEEXISTS)
return;
else
throw KeeperException(code, path);
}
2014-08-11 14:05:38 +00:00
void ZooKeeper::createAncestors(const std::string & path)
{
size_t pos = 1;
while (true)
{
pos = path.find('/', pos);
if (pos == std::string::npos)
break;
createIfNotExists(path.substr(0, pos), "");
++pos;
}
}
int32_t ZooKeeper::removeImpl(const std::string & path, int32_t version)
{
int32_t code = zoo_delete(impl, path.c_str(), version);
ProfileEvents::increment(ProfileEvents::ZooKeeperRemove);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
return code;
}
2014-03-07 17:57:53 +00:00
void ZooKeeper::remove(const std::string & path, int32_t version)
{
check(tryRemove(path, version), path);
2014-03-07 17:57:53 +00:00
}
int32_t ZooKeeper::tryRemove(const std::string & path, int32_t version)
2014-03-07 17:57:53 +00:00
{
int32_t code = removeImpl(path, version);
if (!( code == ZOK ||
code == ZNONODE ||
code == ZBADVERSION ||
code == ZNOTEMPTY))
throw KeeperException(code, path);
return code;
2014-03-07 17:57:53 +00:00
}
2014-07-03 17:24:17 +00:00
int32_t ZooKeeper::tryRemoveWithRetries(const std::string & path, int32_t version, size_t * attempt)
{
2014-07-03 17:24:17 +00:00
int32_t code = retry(boost::bind(&ZooKeeper::removeImpl, this, boost::ref(path), version), attempt);
if (!( code == ZOK ||
code == ZNONODE ||
code == ZBADVERSION ||
code == ZNOTEMPTY))
throw KeeperException(code, path);
return code;
}
int32_t ZooKeeper::existsImpl(const std::string & path, Stat * stat_, EventPtr watch)
2014-03-07 17:57:53 +00:00
{
int32_t code;
Stat stat;
code = zoo_wexists(impl, path.c_str(), callbackForEvent(watch), watchForEvent(watch), &stat);
ProfileEvents::increment(ProfileEvents::ZooKeeperExists);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
if (code == ZOK)
{
if (stat_)
*stat_ = stat;
}
return code;
}
bool ZooKeeper::exists(const std::string & path, Stat * stat_, EventPtr watch)
{
int32_t code = retry(boost::bind(&ZooKeeper::existsImpl, this, path, stat_, watch));
if (!( code == ZOK ||
code == ZNONODE))
throw KeeperException(code, path);
if (code == ZNONODE)
return false;
return true;
2014-03-07 17:57:53 +00:00
}
int32_t ZooKeeper::getImpl(const std::string & path, std::string & res, Stat * stat_, EventPtr watch)
{
char buffer[MAX_NODE_SIZE];
int buffer_len = MAX_NODE_SIZE;
int32_t code;
Stat stat;
code = zoo_wget(impl, path.c_str(), callbackForEvent(watch), watchForEvent(watch), buffer, &buffer_len, &stat);
ProfileEvents::increment(ProfileEvents::ZooKeeperGet);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
if (code == ZOK)
{
if (stat_)
*stat_ = stat;
res = std::string(buffer, buffer_len);
}
return code;
}
std::string ZooKeeper::get(const std::string & path, Stat * stat, EventPtr watch)
2014-03-07 17:57:53 +00:00
{
std::string res;
if (tryGet(path, res, stat, watch))
return res;
else
2014-07-01 15:58:25 +00:00
throw KeeperException("Can't get data for node " + path + ": node doesn't exist");
2014-03-07 17:57:53 +00:00
}
bool ZooKeeper::tryGet(const std::string & path, std::string & res, Stat * stat_, EventPtr watch)
2014-03-07 17:57:53 +00:00
{
int32_t code = retry(boost::bind(&ZooKeeper::getImpl, this, boost::ref(path), boost::ref(res), stat_, watch));
if (!( code == ZOK ||
code == ZNONODE))
throw KeeperException(code, path);
return code == ZOK;
2014-03-07 13:50:58 +00:00
}
int32_t ZooKeeper::setImpl(const std::string & path, const std::string & data,
int32_t version, Stat * stat_)
{
Stat stat;
int32_t code = zoo_set2(impl, path.c_str(), data.c_str(), data.length(), version, &stat);
ProfileEvents::increment(ProfileEvents::ZooKeeperSet);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
if (code == ZOK)
{
if (stat_)
*stat_ = stat;
}
return code;
}
2014-03-07 17:57:53 +00:00
void ZooKeeper::set(const std::string & path, const std::string & data, int32_t version, Stat * stat)
{
check(trySet(path, data, version, stat), path);
2014-03-07 17:57:53 +00:00
}
int32_t ZooKeeper::trySet(const std::string & path, const std::string & data,
int32_t version, Stat * stat_)
{
int32_t code = setImpl(path, data, version, stat_);
if (!( code == ZOK ||
code == ZNONODE ||
code == ZBADVERSION))
throw KeeperException(code, path);
return code;
}
int32_t ZooKeeper::multiImpl(const Ops & ops_, OpResultsPtr * out_results_)
{
2014-07-28 14:31:07 +00:00
if (ops_.empty())
return ZOK;
/// Workaround ошибки в сишном клиенте ZooKeeper. Если сессия истекла, zoo_multi иногда падает с segfault.
/// Наверно, здесь есть race condition, и возможен segfault, если сессия истечет между этой проверкой и zoo_multi.
/// TODO: Посмотреть, не исправлено ли это в последней версии клиента, и исправить.
if (expired())
return ZINVALIDSTATE;
size_t count = ops_.size();
OpResultsPtr out_results(new OpResults(count));
/// копируем структуру, содержащую указатели, дефолтным конструктором копирования
/// это безопасно, т.к. у нее нет деструктора
std::vector<zoo_op_t> ops;
for (const Op & op : ops_)
ops.push_back(*(op.data));
int32_t code = zoo_multi(impl, ops.size(), ops.data(), out_results->data());
ProfileEvents::increment(ProfileEvents::ZooKeeperMulti);
ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
if (out_results_)
*out_results_ = out_results;
return code;
}
OpResultsPtr ZooKeeper::multi(const Ops & ops)
{
OpResultsPtr results;
check(tryMulti(ops, &results));
return results;
}
int32_t ZooKeeper::tryMulti(const Ops & ops_, OpResultsPtr * out_results_)
{
int32_t code = multiImpl(ops_, out_results_);
if (!(code == ZOK ||
code == ZNONODE ||
code == ZNODEEXISTS ||
code == ZNOCHILDRENFOREPHEMERALS ||
code == ZBADVERSION ||
code == ZNOTEMPTY))
2014-04-09 15:52:47 +00:00
throw KeeperException(code);
return code;
}
int32_t ZooKeeper::tryMultiWithRetries(const Ops & ops, OpResultsPtr * out_results, size_t * attempt)
{
int32_t code = retry(boost::bind(&ZooKeeper::multiImpl, this, boost::ref(ops), out_results), attempt);
if (!(code == ZOK ||
code == ZNONODE ||
code == ZNODEEXISTS ||
code == ZNOCHILDRENFOREPHEMERALS ||
code == ZBADVERSION ||
code == ZNOTEMPTY))
throw KeeperException(code);
return code;
}
static const int BATCH_SIZE = 100;
2014-05-27 12:08:40 +00:00
void ZooKeeper::removeChildrenRecursive(const std::string & path)
2014-03-22 14:44:44 +00:00
{
Strings children = getChildren(path);
while (!children.empty())
{
zkutil::Ops ops;
for (size_t i = 0; i < BATCH_SIZE && !children.empty(); ++i)
{
removeChildrenRecursive(path + "/" + children.back());
ops.push_back(new Op::Remove(path + "/" + children.back(), -1));
children.pop_back();
}
multi(ops);
}
}
2014-07-07 09:51:42 +00:00
void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
{
Strings children;
if (tryGetChildren(path, children) != ZOK)
return;
while (!children.empty())
2014-07-07 09:51:42 +00:00
{
zkutil::Ops ops;
Strings batch;
for (size_t i = 0; i < BATCH_SIZE && !children.empty(); ++i)
{
batch.push_back(path + "/" + children.back());
children.pop_back();
tryRemoveChildrenRecursive(batch.back());
ops.push_back(new Op::Remove(batch.back(), -1));
}
2014-07-07 09:51:42 +00:00
/** Сначала пытаемся удалить детей более быстрым способом - сразу пачкой. Если не получилось,
* значит кто-то кроме нас удаляет этих детей, и придется удалять их по одному.
*/
if (tryMulti(ops) != ZOK)
2014-07-07 09:51:42 +00:00
{
for (const std::string & child : batch)
{
tryRemove(child);
}
2014-07-07 09:51:42 +00:00
}
}
}
void ZooKeeper::removeRecursive(const std::string & path)
{
removeChildrenRecursive(path);
2014-03-22 14:44:44 +00:00
remove(path);
}
2014-07-07 09:51:42 +00:00
void ZooKeeper::tryRemoveRecursive(const std::string & path)
{
tryRemoveChildrenRecursive(path);
tryRemove(path);
}
2014-06-30 07:58:16 +00:00
ZooKeeper::~ZooKeeper()
2014-03-07 17:57:53 +00:00
{
2014-08-08 12:53:55 +00:00
LOG_INFO(&Logger::get("~ZooKeeper"), "Closing ZooKeeper session");
2014-06-30 07:58:16 +00:00
int code = zookeeper_close(impl);
if (code != ZOK)
{
LOG_ERROR(&Logger::get("~ZooKeeper"), "Failed to close ZooKeeper session: " << zerror(code));
}
2014-08-08 12:53:55 +00:00
LOG_INFO(&Logger::get("~ZooKeeper"), "Removing " << watch_store.size() << " watches");
/// удаляем WatchWithEvent которые уже никогда не будут обработаны
for (WatchWithEvent * watch : watch_store)
delete watch;
2014-08-08 12:53:55 +00:00
LOG_INFO(&Logger::get("~ZooKeeper"), "Removed watches");
2014-03-07 17:57:53 +00:00
}
2014-04-25 13:55:15 +00:00
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return new ZooKeeper(hosts, sessionTimeoutMs);
2014-04-25 13:55:15 +00:00
}
Op::Create::Create(const std::string & path_, const std::string & value_, AclPtr acl, int32_t flags)
: path(path_), value(value_), created_path(path.size() + ZooKeeper::SEQUENTIAL_SUFFIX_SIZE)
{
zoo_create_op_init(data.get(), path.c_str(), value.c_str(), value.size(), acl, flags, created_path.data(), created_path.size());
}
AclPtr ZooKeeper::getDefaultACL()
{
Poco::ScopedLock<Poco::FastMutex> lock(mutex);
return default_acl;
}
void ZooKeeper::setDefaultACL(AclPtr new_acl)
{
Poco::ScopedLock<Poco::FastMutex> lock(mutex);
default_acl = new_acl;
}
2014-06-04 13:51:40 +00:00
std::string ZooKeeper::error2string(int32_t code)
{
return zerror(code);
}
int ZooKeeper::state()
{
return zoo_state(impl);
}
bool ZooKeeper::expired()
{
return state() == ZOO_EXPIRED_SESSION_STATE;
}
2014-07-03 17:24:17 +00:00
int64_t ZooKeeper::getClientID()
{
return zoo_client_id(impl)->client_id;
}
2014-03-07 17:57:53 +00:00
}