2020-10-10 18:37:02 +00:00
|
|
|
#pragma once
|
2020-03-01 21:56:17 +00:00
|
|
|
#include <cstdint>
|
2021-09-20 14:23:10 +00:00
|
|
|
#include <atomic>
|
2020-03-01 21:56:17 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
/** Allows to randomize thread scheduling and insert various glitches across whole program for testing purposes.
|
|
|
|
* It is done by setting up a timer that will send PROF signal to every thread when certain amount of CPU time has passed.
|
|
|
|
*
|
|
|
|
* To initialize ThreadFuzzer, call ThreadFuzzer::instance().
|
|
|
|
* The behaviour is controlled by environment variables:
|
|
|
|
*
|
|
|
|
* THREAD_FUZZER_CPU_TIME_PERIOD_US - period of signals in microseconds.
|
|
|
|
* THREAD_FUZZER_YIELD_PROBABILITY - probability to do 'sched_yield'.
|
|
|
|
* THREAD_FUZZER_MIGRATE_PROBABILITY - probability to set CPU affinity to random CPU core.
|
|
|
|
* THREAD_FUZZER_SLEEP_PROBABILITY - probability to sleep.
|
|
|
|
* THREAD_FUZZER_SLEEP_TIME_US - amount of time to sleep in microseconds.
|
|
|
|
*
|
|
|
|
* ThreadFuzzer will do nothing if environment variables are not set accordingly.
|
|
|
|
*
|
|
|
|
* The intention is to reproduce thread synchronization bugs (race conditions and deadlocks) more frequently in tests.
|
|
|
|
* We already have tests with TSan. But TSan only covers "physical" synchronization bugs, but not "logical" ones,
|
|
|
|
* where all data is protected by synchronization primitives, but we still have race conditions.
|
|
|
|
* Obviously, TSan cannot debug distributed synchronization bugs.
|
|
|
|
*
|
|
|
|
* The motivation for this tool is an evidence, that concurrency bugs are more likely to reproduce
|
|
|
|
* on bad unstable virtual machines in a dirty environments.
|
|
|
|
*
|
|
|
|
* The idea is not new, see also:
|
|
|
|
* https://channel9.msdn.com/blogs/peli/concurrency-fuzzing-with-cuzz
|
|
|
|
*
|
|
|
|
* Notes:
|
|
|
|
* - it can be also implemented with instrumentation (example: LLVM Xray) instead of signals.
|
|
|
|
* - we should also make the sleep time random.
|
2020-09-17 18:04:11 +00:00
|
|
|
* - sleep and migration obviously helps, but the effect of yield is unclear.
|
2020-03-12 19:08:25 +00:00
|
|
|
*
|
|
|
|
* In addition, we allow to inject glitches around thread synchronization functions.
|
|
|
|
* Example:
|
|
|
|
*
|
|
|
|
* THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
|
|
|
|
* THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
|
|
|
|
* THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
|
|
|
|
* THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
|
2020-03-01 21:56:17 +00:00
|
|
|
*/
|
|
|
|
class ThreadFuzzer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ThreadFuzzer & instance()
|
|
|
|
{
|
|
|
|
static ThreadFuzzer res;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-03-12 19:08:25 +00:00
|
|
|
bool isEffective() const;
|
2020-03-01 21:56:17 +00:00
|
|
|
|
2021-09-20 14:23:10 +00:00
|
|
|
static void stop();
|
|
|
|
static void start();
|
|
|
|
static bool isStarted();
|
|
|
|
|
2020-03-01 21:56:17 +00:00
|
|
|
private:
|
|
|
|
uint64_t cpu_time_period_us = 0;
|
|
|
|
double yield_probability = 0;
|
|
|
|
double migrate_probability = 0;
|
|
|
|
double sleep_probability = 0;
|
2020-03-12 19:08:25 +00:00
|
|
|
double sleep_time_us = 0;
|
2020-03-01 21:56:17 +00:00
|
|
|
|
2021-09-20 14:23:10 +00:00
|
|
|
inline static std::atomic<bool> started{true};
|
|
|
|
|
2020-03-01 21:56:17 +00:00
|
|
|
ThreadFuzzer();
|
|
|
|
|
|
|
|
void initConfiguration();
|
2020-04-22 05:39:31 +00:00
|
|
|
void setup() const;
|
2020-03-01 21:56:17 +00:00
|
|
|
|
|
|
|
static void signalHandler(int);
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|