Fix QueryProfiler (query_profiler_{cpu,real}_time_period_ns) reset

Even after timer_delete() the signal can be fired.
Reproducer:

    $ clickhouse-server & # with configured trace_log
    $ clickhouse-benchmark -c2 --query 'select * from numbers(1e6)' --query_profiler_cpu_time_period_ns=1 &
    ...
    2021.12.02 14:28:01.320288 [ 24885 ] {} <Debug> TCPHandler: Processed in 177.055205644 sec.
    User defined signal 2

CI failures:
- https://s3.amazonaws.com/clickhouse-test-reports/32067/8dbc7a8dae17090a18778f29629d8746a1bb9b72/stateful_tests__debug__actions_.html
- https://s3.amazonaws.com/clickhouse-test-reports/32064/c07450a7dce363b7a4c5ca3ab0e833c25e3d46c0/stateful_tests__debug__actions_.html

Fix this by do not reset the signal back, and introduce a flag to ignore
signals after disabling the timer.

Fixes: #31740
This commit is contained in:
Azat Khuzhin 2021-12-03 01:32:58 +03:00
parent 96bd83c31e
commit 0b2de32228
2 changed files with 22 additions and 10 deletions

View File

@ -25,6 +25,12 @@ namespace
{
#if defined(OS_LINUX)
thread_local size_t write_trace_iteration = 0;
/// Even after timer_delete() the signal can be delivered,
/// since it does not do anything with pending signals.
///
/// And so to overcome this flag is exists,
/// to ignore delivered signals after timer_delete().
thread_local bool signal_handler_disarmed = true;
#endif
void writeTraceInfo(TraceType trace_type, int /* sig */, siginfo_t * info, void * context)
@ -117,10 +123,8 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
if (sigaddset(&sa.sa_mask, pause_signal))
throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
struct sigaction local_previous_handler;
if (sigaction(pause_signal, &sa, &local_previous_handler))
if (sigaction(pause_signal, &sa, nullptr))
throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
previous_handler.emplace(local_previous_handler);
try
{
@ -160,6 +164,8 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
if (timer_settime(*timer_id, 0, &timer_spec, nullptr))
throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
signal_handler_disarmed = false;
}
catch (...)
{
@ -179,11 +185,14 @@ template <typename ProfilerImpl>
void QueryProfilerBase<ProfilerImpl>::tryCleanup()
{
#if USE_UNWIND
if (timer_id.has_value() && timer_delete(*timer_id))
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString(ErrorCodes::CANNOT_DELETE_TIMER));
if (timer_id.has_value())
{
if (timer_delete(*timer_id))
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString(ErrorCodes::CANNOT_DELETE_TIMER));
timer_id.reset();
}
if (previous_handler.has_value() && sigaction(pause_signal, &*previous_handler, nullptr))
LOG_ERROR(log, "Failed to restore signal handler after query profiler {}", errnoToString(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER));
signal_handler_disarmed = true;
#endif
}
@ -196,6 +205,9 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
{
if (signal_handler_disarmed)
return;
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::Real, sig, info, context);
}
@ -206,6 +218,9 @@ QueryProfilerCPU::QueryProfilerCPU(const UInt64 thread_id, const UInt32 period)
void QueryProfilerCPU::signalHandler(int sig, siginfo_t * info, void * context)
{
if (signal_handler_disarmed)
return;
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::CPU, sig, info, context);
}

View File

@ -46,9 +46,6 @@ private:
/// Pause signal to interrupt threads to get traces
int pause_signal;
/// Previous signal handler to restore after query profiler exits
std::optional<struct sigaction> previous_handler;
};
/// Query profiler with timer based on real clock