add fault injection

This commit is contained in:
Alexander Tokmakov 2024-04-03 19:49:23 +02:00
parent f8ef9fc5d3
commit 98ac8031e0
7 changed files with 60 additions and 0 deletions

View File

@ -215,6 +215,7 @@ stop_server
export USE_S3_STORAGE_FOR_MERGE_TREE=1
export RANDOMIZE_OBJECT_KEY_TYPE=1
export ZOOKEEPER_FAULT_INJECTION=1
export THREAD_POOL_FAULT_INJECTION=1
configure
# But we still need default disk because some tables loaded only into it

View File

@ -1569,6 +1569,8 @@ try
new_server_settings.http_connections_store_limit,
});
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
/// Must be the last.
@ -2058,6 +2060,8 @@ try
startup_watch.stop();
ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
try
{
global_context->startClusterDiscovery();

View File

@ -202,6 +202,9 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
/// Check if there are enough threads to process job.
if (threads.size() < std::min(max_threads, scheduled_jobs + 1))
{
if (CannotAllocateThreadFaultInjector::injectFault())
return on_error("fault injected");
try
{
threads.emplace_front();
@ -541,3 +544,30 @@ void GlobalThreadPool::shutdown()
the_instance->finalize();
}
}
CannotAllocateThreadFaultInjector & CannotAllocateThreadFaultInjector::instance()
{
static CannotAllocateThreadFaultInjector ins;
return ins;
}
void CannotAllocateThreadFaultInjector::setFaultProbability(double probability)
{
auto & ins = instance();
std::lock_guard lock(ins.mutex);
ins.enabled = 0 < probability && probability <= 1;
if (ins.enabled)
ins.random.emplace(probability);
else
ins.random.reset();
}
bool CannotAllocateThreadFaultInjector::injectFault()
{
auto & ins = instance();
if (!ins.enabled.load(std::memory_order_relaxed))
return false;
std::lock_guard lock(ins.mutex);
return ins.random && (*ins.random)(ins.rndgen);
}

View File

@ -10,8 +10,10 @@
#include <optional>
#include <atomic>
#include <stack>
#include <random>
#include <boost/heap/priority_queue.hpp>
#include <pcg_random.hpp>
#include <Poco/Event.h>
#include <Common/ThreadStatus.h>
@ -324,3 +326,16 @@ using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
/// To make sure the tracing context is correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
///
using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;
/// Enables fault injections globally for all thread pools
class CannotAllocateThreadFaultInjector
{
std::atomic_bool enabled = false;
std::mutex mutex;
pcg64_fast rndgen;
std::optional<std::bernoulli_distribution> random;
static CannotAllocateThreadFaultInjector & instance();
public:
static void setFaultProbability(double probability);
static bool injectFault();
};

View File

@ -41,6 +41,7 @@ namespace DB
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
M(Int32, max_connections, 1024, "Max server connections.", 0) \
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \

View File

@ -0,0 +1,3 @@
<clickhouse>
<cannot_allocate_thread_fault_injection_probability>0.01</cannot_allocate_thread_fault_injection_probability>
</clickhouse>

View File

@ -132,6 +132,12 @@ else
ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/
fi
if [[ -n "$THREAD_POOL_FAULT_INJECTION" ]] && [[ "$THREAD_POOL_FAULT_INJECTION" -eq 1 ]]; then
ln -sf $SRC_PATH/config.d/cannot_allocate_thread_injection.xml $DEST_SERVER_PATH/config.d/
else
rm -f $DEST_SERVER_PATH/config.d/cannot_allocate_thread_injection.xml ||:
fi
# We randomize creating the snapshot on exit for Keeper to test out using older snapshots
value=$(($RANDOM % 2))
sed --follow-symlinks -i "s|<create_snapshot_on_exit>[01]</create_snapshot_on_exit>|<create_snapshot_on_exit>$value</create_snapshot_on_exit>|" $DEST_SERVER_PATH/config.d/keeper_port.xml