mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-18 22:40:50 +00:00
97f2a2213e
* Move some code outside dbms/src folder * Fix paths
271 lines
7.9 KiB
C++
271 lines
7.9 KiB
C++
#pragma once
|
|
|
|
#include <Core/Types.h>
|
|
#include <Common/ConcurrentBoundedQueue.h>
|
|
#include <Common/CurrentMetrics.h>
|
|
#include <Common/ThreadPool.h>
|
|
#include <Common/ZooKeeper/IKeeper.h>
|
|
|
|
#include <IO/ReadBuffer.h>
|
|
#include <IO/WriteBuffer.h>
|
|
#include <IO/ReadBufferFromPocoSocket.h>
|
|
#include <IO/WriteBufferFromPocoSocket.h>
|
|
|
|
#include <Poco/Net/StreamSocket.h>
|
|
#include <Poco/Net/SocketAddress.h>
|
|
|
|
#include <map>
|
|
#include <mutex>
|
|
#include <chrono>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <thread>
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <optional>
|
|
#include <functional>
|
|
|
|
|
|
/** ZooKeeper C++ library, a replacement for libzookeeper.
|
|
*
|
|
* Motivation.
|
|
*
|
|
* libzookeeper has many bugs:
|
|
* - segfaults: for example, if zookeeper connection was interrupted while reading result of multi response;
|
|
* - memory corruption: for example, as a result of double free inside libzookeeper;
|
|
* - no timeouts for synchronous operations: they may stuck forever under simple Jepsen-like tests;
|
|
* - logical errors: for example, chroot prefix is not removed from the results of multi responses.
|
|
* - data races;
|
|
*
|
|
* The code of libzookeeper is over complicated:
|
|
* - memory ownership is unclear and bugs are very difficult to track and fix.
|
|
* - extremely creepy code for implementation of "chroot" feature.
|
|
*
|
|
* As of 2018, there are no active maintainers of libzookeeper:
|
|
* - bugs in JIRA are fixed only occasionally with ad-hoc patches by library users.
|
|
*
|
|
* libzookeeper is a classical example of bad code written in C.
|
|
*
|
|
* In Go, Python and Rust programming languages,
|
|
* there are separate libraries for ZooKeeper, not based on libzookeeper.
|
|
* Motivation is almost the same. Example:
|
|
* https://github.com/python-zk/kazoo/blob/master/docs/implementation.rst
|
|
*
|
|
* About "session restore" feature.
|
|
*
|
|
* libzookeeper has the feature of session restore. Client receives session id and session token from the server,
|
|
* and when connection is lost, it can quickly reconnect to any server with the same session id and token,
|
|
* to continue with existing session.
|
|
* libzookeeper performs this reconnection automatically.
|
|
*
|
|
* This feature is proven to be harmful.
|
|
* For example, it makes very difficult to correctly remove ephemeral nodes.
|
|
* This may lead to weird bugs in application code.
|
|
* For example, our developers have found that type of bugs in Curator Java library.
|
|
*
|
|
* On the other side, session restore feature has no advantages,
|
|
* because every application should be able to establish new session and reinitialize internal state,
|
|
* when the session is lost and cannot be restored.
|
|
*
|
|
* This library never restores the session. In case of any error, the session is considered as expired
|
|
* and you should create a new instance of ZooKeeper object and reinitialize the application state.
|
|
*
|
|
* This library is not intended to be CPU efficient. Hundreds of thousands operations per second is usually enough.
|
|
*/
|
|
|
|
|
|
namespace CurrentMetrics
|
|
{
|
|
extern const Metric ZooKeeperSession;
|
|
}
|
|
|
|
|
|
namespace Coordination
|
|
{
|
|
|
|
using namespace DB;
|
|
|
|
struct ZooKeeperRequest;
|
|
|
|
|
|
/** Usage scenario: look at the documentation for IKeeper class.
|
|
*/
|
|
class ZooKeeper : public IKeeper
|
|
{
|
|
public:
|
|
using Addresses = std::vector<Poco::Net::SocketAddress>;
|
|
|
|
using XID = int32_t;
|
|
using OpNum = int32_t;
|
|
|
|
/** Connection to addresses is performed in order. If you want, shuffle them manually.
|
|
* Operation timeout couldn't be greater than session timeout.
|
|
* Operation timeout applies independently for network read, network write, waiting for events and synchronization.
|
|
*/
|
|
ZooKeeper(
|
|
const Addresses & addresses,
|
|
const String & root_path,
|
|
const String & auth_scheme,
|
|
const String & auth_data,
|
|
Poco::Timespan session_timeout_,
|
|
Poco::Timespan connection_timeout,
|
|
Poco::Timespan operation_timeout_);
|
|
|
|
~ZooKeeper() override;
|
|
|
|
|
|
/// If expired, you can only destroy the object. All other methods will throw exception.
|
|
bool isExpired() const override { return expired; }
|
|
|
|
/// Useful to check owner of ephemeral node.
|
|
int64_t getSessionID() const override { return session_id; }
|
|
|
|
|
|
/// See the documentation about semantics of these methods in IKeeper class.
|
|
|
|
void create(
|
|
const String & path,
|
|
const String & data,
|
|
bool is_ephemeral,
|
|
bool is_sequential,
|
|
const ACLs & acls,
|
|
CreateCallback callback) override;
|
|
|
|
void remove(
|
|
const String & path,
|
|
int32_t version,
|
|
RemoveCallback callback) override;
|
|
|
|
void exists(
|
|
const String & path,
|
|
ExistsCallback callback,
|
|
WatchCallback watch) override;
|
|
|
|
void get(
|
|
const String & path,
|
|
GetCallback callback,
|
|
WatchCallback watch) override;
|
|
|
|
void set(
|
|
const String & path,
|
|
const String & data,
|
|
int32_t version,
|
|
SetCallback callback) override;
|
|
|
|
void list(
|
|
const String & path,
|
|
ListCallback callback,
|
|
WatchCallback watch) override;
|
|
|
|
void check(
|
|
const String & path,
|
|
int32_t version,
|
|
CheckCallback callback) override;
|
|
|
|
void multi(
|
|
const Requests & requests,
|
|
MultiCallback callback) override;
|
|
|
|
private:
|
|
String root_path;
|
|
ACLs default_acls;
|
|
|
|
Poco::Timespan session_timeout;
|
|
Poco::Timespan operation_timeout;
|
|
|
|
Poco::Net::StreamSocket socket;
|
|
std::optional<ReadBufferFromPocoSocket> in;
|
|
std::optional<WriteBufferFromPocoSocket> out;
|
|
|
|
int64_t session_id = 0;
|
|
|
|
std::atomic<XID> next_xid {1};
|
|
std::atomic<bool> expired {false};
|
|
std::mutex push_request_mutex;
|
|
|
|
using clock = std::chrono::steady_clock;
|
|
|
|
struct RequestInfo
|
|
{
|
|
std::shared_ptr<ZooKeeperRequest> request;
|
|
ResponseCallback callback;
|
|
WatchCallback watch;
|
|
clock::time_point time;
|
|
};
|
|
|
|
using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
|
|
|
|
RequestsQueue requests_queue{1};
|
|
void pushRequest(RequestInfo && info);
|
|
|
|
using Operations = std::map<XID, RequestInfo>;
|
|
|
|
Operations operations;
|
|
std::mutex operations_mutex;
|
|
|
|
using WatchCallbacks = std::vector<WatchCallback>;
|
|
using Watches = std::map<String /* path, relative of root_path */, WatchCallbacks>;
|
|
|
|
Watches watches;
|
|
std::mutex watches_mutex;
|
|
|
|
ThreadFromGlobalPool send_thread;
|
|
ThreadFromGlobalPool receive_thread;
|
|
|
|
void connect(
|
|
const Addresses & addresses,
|
|
Poco::Timespan connection_timeout);
|
|
|
|
void sendHandshake();
|
|
void receiveHandshake();
|
|
|
|
void sendAuth(const String & scheme, const String & data);
|
|
|
|
void receiveEvent();
|
|
|
|
void sendThread();
|
|
void receiveThread();
|
|
|
|
void close();
|
|
|
|
/// Call all remaining callbacks and watches, passing errors to them.
|
|
void finalize(bool error_send, bool error_receive);
|
|
|
|
template <typename T>
|
|
void write(const T &);
|
|
|
|
template <typename T>
|
|
void read(T &);
|
|
|
|
CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession};
|
|
};
|
|
|
|
struct ZooKeeperResponse;
|
|
using ZooKeeperResponsePtr = std::shared_ptr<ZooKeeperResponse>;
|
|
|
|
/// Exposed in header file for Yandex.Metrica code.
|
|
struct ZooKeeperRequest : virtual Request
|
|
{
|
|
ZooKeeper::XID xid = 0;
|
|
bool has_watch = false;
|
|
/// If the request was not send and the error happens, we definitely sure, that is has not been processed by the server.
|
|
/// If the request was sent and we didn't get the response and the error happens, then we cannot be sure was it processed or not.
|
|
bool probably_sent = false;
|
|
|
|
ZooKeeperRequest() = default;
|
|
ZooKeeperRequest(const ZooKeeperRequest &) = default;
|
|
virtual ~ZooKeeperRequest() = default;
|
|
|
|
virtual ZooKeeper::OpNum getOpNum() const = 0;
|
|
|
|
/// Writes length, xid, op_num, then the rest.
|
|
void write(WriteBuffer & out) const;
|
|
|
|
virtual void writeImpl(WriteBuffer &) const = 0;
|
|
|
|
virtual ZooKeeperResponsePtr makeResponse() const = 0;
|
|
};
|
|
|
|
|
|
}
|