mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
add fault injection
This commit is contained in:
parent
f8ef9fc5d3
commit
98ac8031e0
@ -215,6 +215,7 @@ stop_server
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
export RANDOMIZE_OBJECT_KEY_TYPE=1
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
export THREAD_POOL_FAULT_INJECTION=1
|
||||
configure
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
|
@ -1569,6 +1569,8 @@ try
|
||||
new_server_settings.http_connections_store_limit,
|
||||
});
|
||||
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
|
||||
|
||||
/// Must be the last.
|
||||
@ -2058,6 +2060,8 @@ try
|
||||
startup_watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
|
||||
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
try
|
||||
{
|
||||
global_context->startClusterDiscovery();
|
||||
|
@ -202,6 +202,9 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
|
||||
/// Check if there are enough threads to process job.
|
||||
if (threads.size() < std::min(max_threads, scheduled_jobs + 1))
|
||||
{
|
||||
if (CannotAllocateThreadFaultInjector::injectFault())
|
||||
return on_error("fault injected");
|
||||
|
||||
try
|
||||
{
|
||||
threads.emplace_front();
|
||||
@ -541,3 +544,30 @@ void GlobalThreadPool::shutdown()
|
||||
the_instance->finalize();
|
||||
}
|
||||
}
|
||||
|
||||
CannotAllocateThreadFaultInjector & CannotAllocateThreadFaultInjector::instance()
|
||||
{
|
||||
static CannotAllocateThreadFaultInjector ins;
|
||||
return ins;
|
||||
}
|
||||
|
||||
void CannotAllocateThreadFaultInjector::setFaultProbability(double probability)
|
||||
{
|
||||
auto & ins = instance();
|
||||
std::lock_guard lock(ins.mutex);
|
||||
ins.enabled = 0 < probability && probability <= 1;
|
||||
if (ins.enabled)
|
||||
ins.random.emplace(probability);
|
||||
else
|
||||
ins.random.reset();
|
||||
}
|
||||
|
||||
bool CannotAllocateThreadFaultInjector::injectFault()
|
||||
{
|
||||
auto & ins = instance();
|
||||
if (!ins.enabled.load(std::memory_order_relaxed))
|
||||
return false;
|
||||
|
||||
std::lock_guard lock(ins.mutex);
|
||||
return ins.random && (*ins.random)(ins.rndgen);
|
||||
}
|
||||
|
@ -10,8 +10,10 @@
|
||||
#include <optional>
|
||||
#include <atomic>
|
||||
#include <stack>
|
||||
#include <random>
|
||||
|
||||
#include <boost/heap/priority_queue.hpp>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
#include <Poco/Event.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
@ -324,3 +326,16 @@ using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
|
||||
/// To make sure the tracing context is correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
|
||||
///
|
||||
using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;
|
||||
|
||||
/// Enables fault injections globally for all thread pools
|
||||
class CannotAllocateThreadFaultInjector
|
||||
{
|
||||
std::atomic_bool enabled = false;
|
||||
std::mutex mutex;
|
||||
pcg64_fast rndgen;
|
||||
std::optional<std::bernoulli_distribution> random;
|
||||
static CannotAllocateThreadFaultInjector & instance();
|
||||
public:
|
||||
static void setFaultProbability(double probability);
|
||||
static bool injectFault();
|
||||
};
|
||||
|
@ -41,6 +41,7 @@ namespace DB
|
||||
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
|
||||
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
|
||||
M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
|
||||
M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
|
||||
M(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
||||
|
@ -0,0 +1,3 @@
|
||||
<clickhouse>
|
||||
<cannot_allocate_thread_fault_injection_probability>0.01</cannot_allocate_thread_fault_injection_probability>
|
||||
</clickhouse>
|
@ -132,6 +132,12 @@ else
|
||||
ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/
|
||||
fi
|
||||
|
||||
if [[ -n "$THREAD_POOL_FAULT_INJECTION" ]] && [[ "$THREAD_POOL_FAULT_INJECTION" -eq 1 ]]; then
|
||||
ln -sf $SRC_PATH/config.d/cannot_allocate_thread_injection.xml $DEST_SERVER_PATH/config.d/
|
||||
else
|
||||
rm -f $DEST_SERVER_PATH/config.d/cannot_allocate_thread_injection.xml ||:
|
||||
fi
|
||||
|
||||
# We randomize creating the snapshot on exit for Keeper to test out using older snapshots
|
||||
value=$(($RANDOM % 2))
|
||||
sed --follow-symlinks -i "s|<create_snapshot_on_exit>[01]</create_snapshot_on_exit>|<create_snapshot_on_exit>$value</create_snapshot_on_exit>|" $DEST_SERVER_PATH/config.d/keeper_port.xml
|
||||
|
Loading…
Reference in New Issue
Block a user