mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
add fault injection in ZooKeeper client
This commit is contained in:
parent
6e296d0342
commit
27ca943863
@ -34,6 +34,7 @@ dpkg -i package_folder/clickhouse-test_*.deb
|
||||
|
||||
function configure()
|
||||
{
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
|
@ -31,9 +31,13 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
|
||||
{
|
||||
connection_timeout_ms = config.getInt(config_name + "." + key);
|
||||
}
|
||||
else if (key == "session_fault_probability")
|
||||
else if (key == "send_fault_probability")
|
||||
{
|
||||
session_fault_probability = config.getDouble(config_name + "." + key);
|
||||
send_fault_probability = config.getDouble(config_name + "." + key);
|
||||
}
|
||||
else if (key == "recv_fault_probability")
|
||||
{
|
||||
recv_fault_probability = config.getDouble(config_name + "." + key);
|
||||
}
|
||||
else if (key == "identity")
|
||||
{
|
||||
|
@ -23,7 +23,7 @@ struct ZooKeeperArgs
|
||||
bool operator == (const ZooKeeperArgs &) const = default;
|
||||
bool operator != (const ZooKeeperArgs &) const = default;
|
||||
|
||||
String implementation;
|
||||
String implementation = "zookeeper";
|
||||
Strings hosts;
|
||||
String auth_scheme;
|
||||
String identity;
|
||||
@ -31,7 +31,8 @@ struct ZooKeeperArgs
|
||||
int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
|
||||
int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
|
||||
int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
|
||||
float session_fault_probability = 0;
|
||||
float send_fault_probability = 0;
|
||||
float recv_fault_probability = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -336,6 +336,18 @@ ZooKeeper::ZooKeeper(
|
||||
default_acls.emplace_back(std::move(acl));
|
||||
}
|
||||
|
||||
|
||||
/// It makes sense (especially, for async requests) to inject a fault in two places:
|
||||
/// pushRequest (before request is sent) and receiveEvent (after request was executed).
|
||||
if (0 < args.send_fault_probability && args.send_fault_probability <= 1)
|
||||
{
|
||||
send_inject_fault.emplace(args.send_fault_probability);
|
||||
}
|
||||
if (0 < args.recv_fault_probability && args.recv_fault_probability <= 1)
|
||||
{
|
||||
recv_inject_fault.emplace(args.recv_fault_probability);
|
||||
}
|
||||
|
||||
connect(nodes, args.connection_timeout_ms * 1000);
|
||||
|
||||
if (!args.auth_scheme.empty())
|
||||
@ -683,6 +695,9 @@ void ZooKeeper::receiveEvent()
|
||||
RequestInfo request_info;
|
||||
ZooKeeperResponsePtr response;
|
||||
|
||||
if (unlikely(recv_inject_fault) && recv_inject_fault.value()(thread_local_rng))
|
||||
throw Exception("Session expired (fault injected)", Error::ZSESSIONEXPIRED);
|
||||
|
||||
if (xid == PING_XID)
|
||||
{
|
||||
if (err != Error::ZOK)
|
||||
@ -1019,6 +1034,9 @@ void ZooKeeper::pushRequest(RequestInfo && info)
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(send_inject_fault) && send_inject_fault.value()(thread_local_rng))
|
||||
throw Exception("Session expired (fault injected)", Error::ZSESSIONEXPIRED);
|
||||
|
||||
if (!requests_queue.tryPush(std::move(info), args.operation_timeout_ms))
|
||||
{
|
||||
if (requests_queue.isFinished())
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <functional>
|
||||
#include <random>
|
||||
|
||||
|
||||
/** ZooKeeper C++ library, a replacement for libzookeeper.
|
||||
@ -192,6 +193,8 @@ private:
|
||||
|
||||
zkutil::ZooKeeperArgs args;
|
||||
|
||||
std::optional<std::bernoulli_distribution> send_inject_fault;
|
||||
std::optional<std::bernoulli_distribution> recv_inject_fault;
|
||||
|
||||
Poco::Net::StreamSocket socket;
|
||||
/// To avoid excessive getpeername(2) calls.
|
||||
|
19
tests/config/config.d/zookeeper_fault_injection.xml
Normal file
19
tests/config/config.d/zookeeper_fault_injection.xml
Normal file
@ -0,0 +1,19 @@
|
||||
<clickhouse>
|
||||
<zookeeper>
|
||||
<node index="1">
|
||||
<host>localhost</host>
|
||||
<port>9181</port>
|
||||
</node>
|
||||
|
||||
<!-- Settings for fault injection.
|
||||
Approximate probability of request success:
|
||||
(1 - send_fault_probability) * (1 - recv_fault_probability) = 0.99998 * 0.99998 = 0.99996
|
||||
Actually it will be less, because if some request fails due to fault injection,
|
||||
then all requests which are in the queue now also fail.
|
||||
In other words, session will expire 4 times per 99996 successful requests
|
||||
or approximately each 25000 requests (on average).
|
||||
-->
|
||||
<send_fault_probability>0.00002</send_fault_probability>
|
||||
<recv_fault_probability>0.00002</recv_fault_probability>
|
||||
</zookeeper>
|
||||
</clickhouse>
|
@ -15,7 +15,6 @@ mkdir -p $DEST_SERVER_PATH/config.d/
|
||||
mkdir -p $DEST_SERVER_PATH/users.d/
|
||||
mkdir -p $DEST_CLIENT_PATH
|
||||
|
||||
ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/part_log.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/
|
||||
@ -72,6 +71,13 @@ ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/
|
||||
ln -sf --backup=simple --suffix=_original.xml \
|
||||
$SRC_PATH/config.d/query_masking_rules.xml $DEST_SERVER_PATH/config.d/
|
||||
|
||||
|
||||
if [[ -n "$ZOOKEEPER_FAULT_INJECTION" ]] && [[ "$ZOOKEEPER_FAULT_INJECTION" -eq 1 ]]; then
|
||||
ln -sf $SRC_PATH/config.d/zookeeper_fault_injection.xml $DEST_SERVER_PATH/config.d/
|
||||
else
|
||||
ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
|
||||
ln -sf $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user