2019-12-23 18:56:57 +00:00
|
|
|
#ifdef OS_LINUX /// Because of 'sigqueue' functions and RT signals.
|
|
|
|
|
2019-12-22 17:20:33 +00:00
|
|
|
#include <signal.h>
|
2019-12-22 20:17:16 +00:00
|
|
|
#include <poll.h>
|
2019-12-22 17:20:33 +00:00
|
|
|
|
|
|
|
#include <mutex>
|
|
|
|
#include <filesystem>
|
|
|
|
|
|
|
|
#include <ext/scope_guard.h>
|
|
|
|
|
|
|
|
#include <Storages/System/StorageSystemStackTrace.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <DataTypes/DataTypeArray.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
2019-12-22 20:17:16 +00:00
|
|
|
#include <Common/PipeFDs.h>
|
|
|
|
#include <common/getThreadNumber.h>
|
2019-12-22 17:20:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_SIGQUEUE;
|
2019-12-22 20:17:16 +00:00
|
|
|
extern const int CANNOT_MANIPULATE_SIGSET;
|
|
|
|
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
|
|
|
extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
|
|
|
|
extern const int LOGICAL_ERROR;
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
2019-12-22 20:17:16 +00:00
|
|
|
const pid_t expected_pid = getpid();
|
|
|
|
const int sig = SIGRTMIN;
|
2019-12-22 20:37:29 +00:00
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
int sequence_num = 0; /// For messages sent via pipe.
|
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
UInt32 thread_number{0};
|
|
|
|
std::optional<StackTrace> stack_trace;
|
2019-12-22 20:37:29 +00:00
|
|
|
|
|
|
|
static constexpr size_t max_query_id_size = 128;
|
|
|
|
char query_id_data[max_query_id_size];
|
|
|
|
size_t query_id_size = 0;
|
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
LazyPipeFDs notification_pipe;
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
void signalHandler(int, siginfo_t * info, void * context)
|
|
|
|
{
|
|
|
|
/// In case malicious user is sending signals manually (for unknown reason).
|
|
|
|
/// If we don't check - it may break our synchronization.
|
|
|
|
if (info->si_pid != expected_pid)
|
|
|
|
return;
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
/// Signal received too late.
|
|
|
|
if (info->si_value.sival_int != sequence_num)
|
|
|
|
return;
|
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
/// All these methods are signal-safe.
|
|
|
|
const ucontext_t signal_context = *reinterpret_cast<ucontext_t *>(context);
|
|
|
|
stack_trace.emplace(signal_context);
|
|
|
|
thread_number = getThreadNumber();
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:37:29 +00:00
|
|
|
StringRef query_id = CurrentThread::getQueryId();
|
|
|
|
query_id_size = std::min(query_id.size, max_query_id_size);
|
|
|
|
memcpy(query_id_data, query_id.data, query_id_size);
|
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
int notification_num = info->si_value.sival_int;
|
|
|
|
ssize_t res = ::write(notification_pipe.fds_rw[1], ¬ification_num, sizeof(notification_num));
|
2019-12-23 18:58:42 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
/// We cannot do anything if write failed.
|
2019-12-23 18:58:42 +00:00
|
|
|
(void)res;
|
2019-12-22 20:17:16 +00:00
|
|
|
}
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:37:29 +00:00
|
|
|
/// Wait for data in pipe and read it.
|
2019-12-22 20:17:16 +00:00
|
|
|
bool wait(int timeout_ms)
|
|
|
|
{
|
|
|
|
while (true)
|
2019-12-22 17:20:33 +00:00
|
|
|
{
|
2019-12-22 20:17:16 +00:00
|
|
|
int fd = notification_pipe.fds_rw[0];
|
|
|
|
pollfd poll_fd{fd, POLLIN, 0};
|
|
|
|
|
|
|
|
int poll_res = poll(&poll_fd, 1, timeout_ms);
|
|
|
|
if (poll_res < 0)
|
|
|
|
{
|
|
|
|
if (errno == EINTR)
|
|
|
|
{
|
|
|
|
--timeout_ms; /// Quite a hacky way to update timeout. Just to make sure we avoid infinite waiting.
|
|
|
|
if (timeout_ms == 0)
|
|
|
|
return false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
throwFromErrno("Cannot poll pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
|
|
|
|
}
|
|
|
|
if (poll_res == 0)
|
|
|
|
return false;
|
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
int notification_num = 0;
|
|
|
|
ssize_t read_res = ::read(fd, ¬ification_num, sizeof(notification_num));
|
2019-12-22 20:17:16 +00:00
|
|
|
|
|
|
|
if (read_res < 0)
|
|
|
|
{
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
|
|
|
|
}
|
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
if (read_res == sizeof(notification_num))
|
|
|
|
{
|
|
|
|
if (notification_num == sequence_num)
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
continue; /// Drain delayed notifications.
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception("Logical error: read wrong number of bytes from pipe", ErrorCodes::LOGICAL_ERROR);
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
2019-12-22 20:17:16 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-22 17:20:33 +00:00
|
|
|
|
|
|
|
|
2019-12-23 16:49:06 +00:00
|
|
|
StorageSystemStackTrace::StorageSystemStackTrace(const String & name_)
|
|
|
|
: IStorageSystemOneBlock<StorageSystemStackTrace>(name_)
|
2019-12-22 20:17:16 +00:00
|
|
|
{
|
|
|
|
notification_pipe.open();
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
/// Setup signal handler.
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
struct sigaction sa{};
|
|
|
|
sa.sa_sigaction = signalHandler;
|
|
|
|
sa.sa_flags = SA_SIGINFO;
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
if (sigemptyset(&sa.sa_mask))
|
|
|
|
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
if (sigaddset(&sa.sa_mask, sig))
|
|
|
|
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
if (sigaction(sig, &sa, nullptr))
|
|
|
|
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
|
|
|
}
|
2019-12-22 17:20:33 +00:00
|
|
|
|
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
NamesAndTypesList StorageSystemStackTrace::getNamesAndTypes()
|
|
|
|
{
|
|
|
|
return
|
|
|
|
{
|
|
|
|
{ "thread_number", std::make_shared<DataTypeUInt32>() },
|
|
|
|
{ "query_id", std::make_shared<DataTypeString>() },
|
|
|
|
{ "trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()) }
|
|
|
|
};
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
|
2019-12-22 17:20:33 +00:00
|
|
|
void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Context &, const SelectQueryInfo &) const
|
|
|
|
{
|
2019-12-22 20:17:16 +00:00
|
|
|
/// It shouldn't be possible to do concurrent reads from this table.
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
|
|
|
|
/// Send a signal to every thread and wait for result.
|
|
|
|
/// We must wait for every thread one by one sequentially,
|
|
|
|
/// because there is a limit on number of queued signals in OS and otherwise signals may get lost.
|
|
|
|
/// Also, non-RT signals are not delivered if previous signal is handled right now (by default; but we use RT signals).
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
/// Obviously, results for different threads may be out of sync.
|
2019-12-22 17:20:33 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
/// There is no better way to enumerate threads in a process other than looking into procfs.
|
2019-12-22 17:20:33 +00:00
|
|
|
|
|
|
|
std::filesystem::directory_iterator end;
|
|
|
|
for (std::filesystem::directory_iterator it("/proc/self/task"); it != end; ++it)
|
|
|
|
{
|
|
|
|
pid_t tid = parse<pid_t>(it->path().filename());
|
2019-12-22 20:17:16 +00:00
|
|
|
|
2019-12-23 19:23:54 +00:00
|
|
|
sigval sig_value{};
|
|
|
|
sig_value.sival_int = sequence_num;
|
2019-12-22 20:17:16 +00:00
|
|
|
if (0 != ::sigqueue(tid, sig, sig_value))
|
2019-12-22 17:20:33 +00:00
|
|
|
{
|
2019-12-22 20:17:16 +00:00
|
|
|
/// The thread may has been already finished.
|
|
|
|
if (ESRCH == errno)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
throwFromErrno("Cannot send signal with sigqueue", ErrorCodes::CANNOT_SIGQUEUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Just in case we will wait for pipe with timeout. In case signal didn't get processed.
|
2019-12-22 20:37:29 +00:00
|
|
|
|
2019-12-22 20:17:16 +00:00
|
|
|
if (wait(100))
|
|
|
|
{
|
|
|
|
size_t stack_trace_size = stack_trace->getSize();
|
|
|
|
size_t stack_trace_offset = stack_trace->getOffset();
|
|
|
|
|
|
|
|
Array arr;
|
|
|
|
arr.reserve(stack_trace_size - stack_trace_offset);
|
|
|
|
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
|
|
|
|
arr.emplace_back(reinterpret_cast<intptr_t>(stack_trace->getFrames()[i]));
|
|
|
|
|
|
|
|
res_columns[0]->insert(thread_number);
|
2019-12-22 20:37:29 +00:00
|
|
|
res_columns[1]->insertData(query_id_data, query_id_size);
|
2019-12-22 20:17:16 +00:00
|
|
|
res_columns[2]->insert(arr);
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-12-22 20:17:16 +00:00
|
|
|
/// Cannot obtain a stack trace. But create a record in result nevertheless.
|
|
|
|
|
2019-12-22 20:37:29 +00:00
|
|
|
res_columns[0]->insert(tid); /// TODO Replace all thread numbers to OS thread numbers.
|
2019-12-22 20:17:16 +00:00
|
|
|
res_columns[1]->insertDefault();
|
|
|
|
res_columns[2]->insertDefault();
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
2019-12-23 19:23:54 +00:00
|
|
|
|
|
|
|
sequence_num = static_cast<int>(static_cast<unsigned>(sequence_num) + 1);
|
2019-12-22 17:20:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-12-23 18:56:57 +00:00
|
|
|
#endif
|