mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #61109 from ClickHouse/signal_handlers_in_client
Add signal handlers in client and local
This commit is contained in:
commit
a56e08ef8a
@ -21,6 +21,7 @@
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/SignalHandlers.h>
|
||||
#include <Common/tryGetFileNameByFileDescriptor.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -302,7 +303,13 @@ public:
|
||||
};
|
||||
|
||||
|
||||
ClientBase::~ClientBase() = default;
|
||||
ClientBase::~ClientBase()
|
||||
{
|
||||
writeSignalIDtoSignalPipe(SignalListener::StopThread);
|
||||
signal_listener_thread.join();
|
||||
HandledSignals::instance().reset();
|
||||
}
|
||||
|
||||
ClientBase::ClientBase(
|
||||
int in_fd_,
|
||||
int out_fd_,
|
||||
@ -3072,6 +3079,8 @@ void ClientBase::init(int argc, char ** argv)
|
||||
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
|
||||
|
||||
("fuzzer-args", po::value<std::string>(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.")
|
||||
|
||||
("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)")
|
||||
;
|
||||
|
||||
addOptions(options_description);
|
||||
@ -3236,6 +3245,25 @@ void ClientBase::init(int argc, char ** argv)
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
}
|
||||
|
||||
/// Print stacktrace in case of crash
|
||||
HandledSignals::instance().setupTerminateHandler();
|
||||
HandledSignals::instance().setupCommonDeadlySignalHandlers();
|
||||
/// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't
|
||||
/// have an option for client to shutdown gracefully.
|
||||
|
||||
fatal_channel_ptr = new Poco::SplitterChannel;
|
||||
fatal_console_channel_ptr = new Poco::ConsoleChannel;
|
||||
fatal_channel_ptr->addChannel(fatal_console_channel_ptr);
|
||||
if (options.count("client_logs_file"))
|
||||
{
|
||||
fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as<std::string>());
|
||||
fatal_channel_ptr->addChannel(fatal_file_channel_ptr);
|
||||
}
|
||||
|
||||
fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL);
|
||||
signal_listener = std::make_unique<SignalListener>(nullptr, fatal_log);
|
||||
signal_listener_thread.start(*signal_listener);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,9 @@
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/SimpleFileChannel.h>
|
||||
#include <Poco/SplitterChannel.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Client/Suggest.h>
|
||||
#include <boost/program_options.hpp>
|
||||
@ -212,6 +215,13 @@ protected:
|
||||
SharedContextHolder shared_context;
|
||||
ContextMutablePtr global_context;
|
||||
|
||||
LoggerPtr fatal_log;
|
||||
Poco::AutoPtr<Poco::SplitterChannel> fatal_channel_ptr;
|
||||
Poco::AutoPtr<Poco::Channel> fatal_console_channel_ptr;
|
||||
Poco::AutoPtr<Poco::Channel> fatal_file_channel_ptr;
|
||||
Poco::Thread signal_listener_thread;
|
||||
std::unique_ptr<Poco::Runnable> signal_listener;
|
||||
|
||||
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
|
||||
bool is_multiquery = false;
|
||||
bool delayed_interactive = false;
|
||||
|
635
src/Common/SignalHandlers.cpp
Normal file
635
src/Common/SignalHandlers.cpp
Normal file
@ -0,0 +1,635 @@
|
||||
#include <Common/SignalHandlers.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include <Daemon/SentryWriter.h>
|
||||
#include <base/sleep.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int CANNOT_SEND_SIGNAL;
|
||||
}
|
||||
}
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
||||
void call_default_signal_handler(int sig)
|
||||
{
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
|
||||
if (0 != raise(sig))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal");
|
||||
}
|
||||
|
||||
|
||||
void writeSignalIDtoSignalPipe(int sig)
|
||||
{
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
writeBinary(sig, out);
|
||||
out.next();
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
void closeLogsSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
|
||||
void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
if (asynchronous_stack_unwinding && sig == SIGSEGV)
|
||||
siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1);
|
||||
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
|
||||
const StackTrace stack_trace(*signal_context);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (const auto fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
|
||||
GWPAsan::isGWPAsanError(fault_address))
|
||||
GWPAsan::printReport(fault_address);
|
||||
#endif
|
||||
|
||||
writeBinary(sig, out);
|
||||
writePODBinary(*info, out);
|
||||
writePODBinary(signal_context, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
if (sig != SIGTSTP) /// This signal is used for debugging.
|
||||
{
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
|
||||
for (size_t i = 0; i < 300; ++i)
|
||||
{
|
||||
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
|
||||
if (HandledSignals::instance().fatal_error_printed.test())
|
||||
break;
|
||||
|
||||
/// This coarse method of synchronization is perfectly ok for fatal signals.
|
||||
sleepForSeconds(1);
|
||||
}
|
||||
|
||||
/// Wait for all logs flush operations
|
||||
sleepForSeconds(3);
|
||||
call_default_signal_handler(sig);
|
||||
}
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
|
||||
[[noreturn]] void terminate_handler()
|
||||
{
|
||||
static thread_local bool terminating = false;
|
||||
if (terminating)
|
||||
abort();
|
||||
|
||||
terminating = true;
|
||||
|
||||
std::string log_message;
|
||||
|
||||
if (std::current_exception())
|
||||
log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true);
|
||||
else
|
||||
log_message = "Terminate called without an active exception";
|
||||
|
||||
/// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
|
||||
/// And the buffer should not be too small because our exception messages can be large.
|
||||
static constexpr size_t buf_size = PIPE_BUF;
|
||||
|
||||
if (log_message.size() > buf_size - 16)
|
||||
log_message.resize(buf_size - 16);
|
||||
|
||||
char buf[buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf);
|
||||
|
||||
writeBinary(static_cast<int>(SignalListener::StdTerminate), out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writeBinary(log_message, out);
|
||||
out.next();
|
||||
|
||||
abort();
|
||||
}
|
||||
|
||||
#if defined(SANITIZER)
|
||||
template <typename T>
|
||||
struct ValueHolder
|
||||
{
|
||||
ValueHolder(T value_) : value(value_)
|
||||
{}
|
||||
|
||||
T value;
|
||||
};
|
||||
|
||||
extern "C" void __sanitizer_set_death_callback(void (*)());
|
||||
|
||||
/// Sanitizers may not expect some function calls from death callback.
|
||||
/// Let's try to disable instrumentation to avoid possible issues.
|
||||
/// However, this callback may call other functions that are still instrumented.
|
||||
/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15)
|
||||
/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549.
|
||||
static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
/// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const StackTrace stack_trace;
|
||||
|
||||
writeBinary(SignalListener::SanitizerTrap, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
/// We create a dummy struct with a constructor so DISABLE_SANITIZER_INSTRUMENTATION is not applied to it
|
||||
/// otherwise, Memory sanitizer can't know that values initiialized inside this function are actually initialized
|
||||
/// because instrumentations are disabled leading to false positives later on
|
||||
ValueHolder<UInt32> thread_id{static_cast<UInt32>(getThreadId())};
|
||||
writeBinary(thread_id.value, out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
sleepForSeconds(20);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void HandledSignals::addSignalHandler(const std::vector<int> & signals, signal_function handler, bool register_signal)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sa.sa_mask);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sa.sa_mask, signal);
|
||||
#else
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sa.sa_mask, signal))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
#endif
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaction(signal, &sa, nullptr))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
if (register_signal)
|
||||
std::copy(signals.begin(), signals.end(), std::back_inserter(handled_signals));
|
||||
}
|
||||
|
||||
void blockSignals(const std::vector<int> & signals)
|
||||
{
|
||||
sigset_t sig_set;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sig_set);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sig_set, signal);
|
||||
#else
|
||||
if (sigemptyset(&sig_set))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sig_set, signal))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
#endif
|
||||
|
||||
if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
}
|
||||
|
||||
|
||||
void SignalListener::run()
|
||||
{
|
||||
static_assert(PIPE_BUF >= 512);
|
||||
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
int sig = 0;
|
||||
readBinary(sig, in);
|
||||
// We may log some specific signals afterwards, with different log
|
||||
// levels and more info, but for completeness we log all signals
|
||||
// here at trace level.
|
||||
// Don't use strsignal here, because it's not thread-safe.
|
||||
LOG_TRACE(log, "Received signal {}", sig);
|
||||
|
||||
if (sig == StopThread)
|
||||
{
|
||||
LOG_INFO(log, "Stop SignalListener thread");
|
||||
break;
|
||||
}
|
||||
else if (sig == SIGHUP)
|
||||
{
|
||||
LOG_DEBUG(log, "Received signal to close logs.");
|
||||
BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger());
|
||||
LOG_INFO(log, "Opened new log file after received signal.");
|
||||
}
|
||||
else if (sig == StdTerminate)
|
||||
{
|
||||
UInt32 thread_num;
|
||||
std::string message;
|
||||
|
||||
readBinary(thread_num, in);
|
||||
readBinary(message, in);
|
||||
|
||||
onTerminate(message, thread_num);
|
||||
}
|
||||
else if (sig == SIGINT ||
|
||||
sig == SIGQUIT ||
|
||||
sig == SIGTERM)
|
||||
{
|
||||
if (daemon)
|
||||
daemon->handleSignal(sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
siginfo_t info{};
|
||||
ucontext_t * context{};
|
||||
StackTrace stack_trace(NoCapture{});
|
||||
std::vector<StackTrace::FramePointers> thread_frame_pointers;
|
||||
UInt32 thread_num{};
|
||||
ThreadStatus * thread_ptr{};
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
readPODBinary(info, in);
|
||||
readPODBinary(context, in);
|
||||
}
|
||||
|
||||
readPODBinary(stack_trace, in);
|
||||
if (sig != SanitizerTrap)
|
||||
readVectorBinary(thread_frame_pointers, in);
|
||||
readBinary(thread_num, in);
|
||||
readPODBinary(thread_ptr, in);
|
||||
|
||||
/// This allows to receive more signals if failure happens inside onFault function.
|
||||
/// Example: segfault while symbolizing stack trace.
|
||||
try
|
||||
{
|
||||
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); }).detach();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Likely cannot allocate thread
|
||||
onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) const
|
||||
{
|
||||
size_t pos = message.find('\n');
|
||||
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos));
|
||||
|
||||
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
|
||||
while (pos != std::string_view::npos)
|
||||
{
|
||||
++pos;
|
||||
size_t next_pos = message.find('\n', pos);
|
||||
size_t size = next_pos;
|
||||
if (next_pos != std::string_view::npos)
|
||||
size = next_pos - pos;
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(message.substr(pos, size)));
|
||||
pos = next_pos;
|
||||
}
|
||||
}
|
||||
|
||||
void SignalListener::onFault(
|
||||
int sig,
|
||||
const siginfo_t & info,
|
||||
ucontext_t * context,
|
||||
const StackTrace & stack_trace,
|
||||
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
|
||||
UInt32 thread_num,
|
||||
DB::ThreadStatus * thread_ptr) const
|
||||
try
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
/// First log those fields that are safe to access and that should not cause new fault.
|
||||
/// That way we will have some duplicated info in the log but we don't loose important info
|
||||
/// in case of double fault.
|
||||
|
||||
LOG_FATAL(log, "########## Short fault info ############");
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, sig);
|
||||
|
||||
std::string signal_description = "Unknown signal";
|
||||
|
||||
/// Some of these are not really signals, but our own indications on failure reason.
|
||||
if (sig == StdTerminate)
|
||||
signal_description = "std::terminate";
|
||||
else if (sig == SanitizerTrap)
|
||||
signal_description = "sanitizer trap";
|
||||
else if (sig >= 0)
|
||||
signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
|
||||
|
||||
LOG_FATAL(log, "Signal description: {}", signal_description);
|
||||
|
||||
String error_message;
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
error_message = signalToErrorMessage(sig, info, *context);
|
||||
else
|
||||
error_message = "Sanitizer trap.";
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
String bare_stacktrace_str;
|
||||
if (stack_trace.getSize())
|
||||
{
|
||||
/// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
|
||||
/// NOTE: This still require memory allocations and mutex lock inside logger.
|
||||
/// BTW we can also print it to stderr using write syscalls.
|
||||
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Stack trace:", bare_stacktrace);
|
||||
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
bare_stacktrace_str = bare_stacktrace.str();
|
||||
}
|
||||
|
||||
/// Now try to access potentially unsafe data in thread_ptr.
|
||||
|
||||
String query_id;
|
||||
String query;
|
||||
|
||||
/// Send logs from this thread to client if possible.
|
||||
/// It will allow client to see failure messages directly.
|
||||
if (thread_ptr)
|
||||
{
|
||||
query_id = thread_ptr->getQueryId();
|
||||
query = thread_ptr->getQueryForLog();
|
||||
|
||||
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
|
||||
{
|
||||
CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_FATAL(log, "########################################");
|
||||
|
||||
if (query_id.empty())
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, signal_description, sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, query_id, query, signal_description, sig);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
if (!bare_stacktrace_str.empty())
|
||||
{
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace_str));
|
||||
}
|
||||
|
||||
/// Write symbolized stack trace line by line for better grep-ability.
|
||||
stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
|
||||
/// In case it's a scheduled job write all previous jobs origins call stacks
|
||||
std::for_each(thread_frame_pointers.rbegin(), thread_frame_pointers.rend(),
|
||||
[this](const StackTrace::FramePointers & frame_pointers)
|
||||
{
|
||||
if (size_t size = std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin())
|
||||
{
|
||||
LOG_FATAL(log, "========================================");
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Job's origin stack trace:", bare_stacktrace);
|
||||
std::for_each_n(frame_pointers.begin(), size,
|
||||
[&bare_stacktrace](const void * ptr)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(ptr, bare_stacktrace);
|
||||
}
|
||||
);
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
|
||||
StackTrace::toStringEveryLine(const_cast<void **>(frame_pointers.data()), 0, size, [this](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
|
||||
/// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
|
||||
|
||||
if (daemon && daemon->stored_binary_hash.empty())
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read.");
|
||||
}
|
||||
else if (daemon)
|
||||
{
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
if (calculated_binary_hash == daemon->stored_binary_hash)
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(
|
||||
log,
|
||||
"Calculated checksum of the executable ({0}) does not correspond"
|
||||
" to the reference checksum stored in the executable ({1})."
|
||||
" This may indicate one of the following:"
|
||||
" - the executable was changed just after startup;"
|
||||
" - the executable was corrupted on disk due to faulty hardware;"
|
||||
" - the loaded executable was corrupted in memory due to faulty hardware;"
|
||||
" - the file was intentionally modified;"
|
||||
" - a logical error in the code.",
|
||||
calculated_binary_hash,
|
||||
daemon->stored_binary_hash);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Write crash to system.crash_log table if available.
|
||||
if (collectCrashLog)
|
||||
collectCrashLog(sig, thread_num, query_id, stack_trace);
|
||||
|
||||
Context::getGlobalContextInstance()->handleCrash();
|
||||
|
||||
/// Send crash report to developers (if configured)
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
if (daemon)
|
||||
{
|
||||
if (auto * sentry = SentryWriter::getInstance())
|
||||
sentry->onSignal(sig, error_message, stack_trace.getFramePointers(), stack_trace.getOffset(), stack_trace.getSize());
|
||||
}
|
||||
|
||||
/// Advice the user to send it manually.
|
||||
if (std::string_view(VERSION_OFFICIAL).contains("official build"))
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
|
||||
/// Approximate support period, upper bound.
|
||||
if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400)
|
||||
{
|
||||
LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues");
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
|
||||
}
|
||||
}
|
||||
|
||||
/// List changed settings.
|
||||
if (!query_id.empty())
|
||||
{
|
||||
ContextPtr query_context = thread_ptr->getQueryContext();
|
||||
if (query_context)
|
||||
{
|
||||
String changed_settings = query_context->getSettingsRef().toString();
|
||||
|
||||
if (changed_settings.empty())
|
||||
LOG_FATAL(log, "No settings were changed");
|
||||
else
|
||||
LOG_FATAL(log, "Changed settings: {}", changed_settings);
|
||||
}
|
||||
}
|
||||
|
||||
/// When everything is done, we will try to send these error messages to the client.
|
||||
if (thread_ptr)
|
||||
thread_ptr->onFatalError();
|
||||
|
||||
HandledSignals::instance().fatal_error_printed.test_and_set();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors.
|
||||
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
|
||||
LOG_FATAL(log, message);
|
||||
}
|
||||
|
||||
HandledSignals::HandledSignals()
|
||||
{
|
||||
signal_pipe.setNonBlockingWrite();
|
||||
signal_pipe.tryIncreaseSize(1 << 20);
|
||||
}
|
||||
|
||||
void HandledSignals::reset()
|
||||
{
|
||||
/// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
|
||||
for (int sig : handled_signals)
|
||||
{
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
{
|
||||
try
|
||||
{
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
}
|
||||
catch (ErrnoException &)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
signal_pipe.close();
|
||||
}
|
||||
|
||||
HandledSignals::~HandledSignals()
|
||||
{
|
||||
reset();
|
||||
};
|
||||
|
||||
HandledSignals & HandledSignals::instance()
|
||||
{
|
||||
static HandledSignals res;
|
||||
return res;
|
||||
}
|
||||
|
||||
void HandledSignals::setupTerminateHandler()
|
||||
{
|
||||
std::set_terminate(terminate_handler);
|
||||
}
|
||||
|
||||
void HandledSignals::setupCommonDeadlySignalHandlers()
|
||||
{
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true);
|
||||
|
||||
#if defined(SANITIZER)
|
||||
__sanitizer_set_death_callback(sanitizerDeathCallback);
|
||||
#endif
|
||||
}
|
||||
|
||||
void HandledSignals::setupCommonTerminateRequestSignalHandlers()
|
||||
{
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, true);
|
||||
}
|
110
src/Common/SignalHandlers.h
Normal file
110
src/Common/SignalHandlers.h
Normal file
@ -0,0 +1,110 @@
|
||||
#pragma once
|
||||
#include <csignal>
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <Common/PipeFDs.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/Runnable.h>
|
||||
|
||||
class BaseDaemon;
|
||||
|
||||
/** Reset signal handler to the default and send signal to itself.
|
||||
* It's called from user signal handler to write core dump.
|
||||
*/
|
||||
void call_default_signal_handler(int sig);
|
||||
|
||||
const size_t signal_pipe_buf_size =
|
||||
sizeof(int)
|
||||
+ sizeof(siginfo_t)
|
||||
+ sizeof(ucontext_t*)
|
||||
+ sizeof(StackTrace)
|
||||
+ sizeof(UInt64)
|
||||
+ sizeof(UInt32)
|
||||
+ sizeof(void*);
|
||||
|
||||
using signal_function = void(int, siginfo_t*, void*);
|
||||
|
||||
void writeSignalIDtoSignalPipe(int sig);
|
||||
|
||||
/** Signal handler for HUP */
|
||||
void closeLogsSignalHandler(int sig, siginfo_t *, void *);
|
||||
|
||||
void terminateRequestedSignalHandler(int sig, siginfo_t *, void *);
|
||||
|
||||
|
||||
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
|
||||
*/
|
||||
void signalHandler(int sig, siginfo_t * info, void * context);
|
||||
|
||||
|
||||
/** To use with std::set_terminate.
|
||||
* Collects slightly more info than __gnu_cxx::__verbose_terminate_handler,
|
||||
* and send it to pipe. Other thread will read this info from pipe and asynchronously write it to log.
|
||||
* Look at libstdc++-v3/libsupc++/vterminate.cc for example.
|
||||
*/
|
||||
[[noreturn]] void terminate_handler();
|
||||
|
||||
/// Avoid link time dependency on DB/Interpreters - will use this function only when linked.
|
||||
__attribute__((__weak__)) void collectCrashLog(
|
||||
Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace);
|
||||
|
||||
|
||||
void blockSignals(const std::vector<int> & signals);
|
||||
|
||||
|
||||
/** The thread that read info about signal or std::terminate from pipe.
|
||||
* On HUP, close log files (for new files to be opened later).
|
||||
* On information about std::terminate, write it to log.
|
||||
* On other signals, write info to log.
|
||||
*/
|
||||
class SignalListener : public Poco::Runnable
|
||||
{
|
||||
public:
|
||||
static constexpr int StdTerminate = -1;
|
||||
static constexpr int StopThread = -2;
|
||||
static constexpr int SanitizerTrap = -3;
|
||||
|
||||
explicit SignalListener(BaseDaemon * daemon_, LoggerPtr log_)
|
||||
: daemon(daemon_), log(log_)
|
||||
{
|
||||
}
|
||||
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
BaseDaemon * daemon;
|
||||
LoggerPtr log;
|
||||
|
||||
void onTerminate(std::string_view message, UInt32 thread_num) const;
|
||||
|
||||
void onFault(
|
||||
int sig,
|
||||
const siginfo_t & info,
|
||||
ucontext_t * context,
|
||||
const StackTrace & stack_trace,
|
||||
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
|
||||
UInt32 thread_num,
|
||||
DB::ThreadStatus * thread_ptr) const;
|
||||
};
|
||||
|
||||
struct HandledSignals
|
||||
{
|
||||
std::vector<int> handled_signals;
|
||||
DB::PipeFDs signal_pipe;
|
||||
std::atomic_flag fatal_error_printed;
|
||||
|
||||
HandledSignals();
|
||||
~HandledSignals();
|
||||
|
||||
void setupTerminateHandler();
|
||||
void setupCommonDeadlySignalHandlers();
|
||||
void setupCommonTerminateRequestSignalHandlers();
|
||||
|
||||
void addSignalHandler(const std::vector<int> & signals, signal_function handler, bool register_signal);
|
||||
|
||||
void reset();
|
||||
|
||||
static HandledSignals & instance();
|
||||
};
|
@ -20,7 +20,6 @@
|
||||
#endif
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <csignal>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -37,6 +36,7 @@
|
||||
#include <Poco/Pipe.h>
|
||||
|
||||
#include <Common/ErrorHandlers.h>
|
||||
#include <Common/SignalHandlers.h>
|
||||
#include <base/argsToConfig.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <base/coverage.h>
|
||||
@ -54,7 +54,6 @@
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
#include <Common/getExecutablePath.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/Elf.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
@ -79,8 +78,6 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int CANNOT_SEND_SIGNAL;
|
||||
extern const int SYSTEM_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
@ -88,114 +85,6 @@ namespace DB
|
||||
|
||||
using namespace DB;
|
||||
|
||||
PipeFDs signal_pipe;
|
||||
|
||||
|
||||
/** Reset signal handler to the default and send signal to itself.
|
||||
* It's called from user signal handler to write core dump.
|
||||
*/
|
||||
static void call_default_signal_handler(int sig)
|
||||
{
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
|
||||
if (0 != raise(sig))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal");
|
||||
}
|
||||
|
||||
static const size_t signal_pipe_buf_size =
|
||||
sizeof(int)
|
||||
+ sizeof(siginfo_t)
|
||||
+ sizeof(ucontext_t*)
|
||||
+ sizeof(StackTrace)
|
||||
+ sizeof(UInt64)
|
||||
+ sizeof(UInt32)
|
||||
+ sizeof(void*);
|
||||
|
||||
using signal_function = void(int, siginfo_t*, void*);
|
||||
|
||||
static void writeSignalIDtoSignalPipe(int sig)
|
||||
{
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
writeBinary(sig, out);
|
||||
out.next();
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
/** Signal handler for HUP */
|
||||
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
|
||||
static std::atomic_flag fatal_error_printed;
|
||||
|
||||
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
|
||||
*/
|
||||
static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
if (asynchronous_stack_unwinding && sig == SIGSEGV)
|
||||
siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1);
|
||||
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
|
||||
const StackTrace stack_trace(*signal_context);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (const auto fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
|
||||
GWPAsan::isGWPAsanError(fault_address))
|
||||
GWPAsan::printReport(fault_address);
|
||||
#endif
|
||||
|
||||
writeBinary(sig, out);
|
||||
writePODBinary(*info, out);
|
||||
writePODBinary(signal_context, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
if (sig != SIGTSTP) /// This signal is used for debugging.
|
||||
{
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
|
||||
for (size_t i = 0; i < 300; ++i)
|
||||
{
|
||||
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
|
||||
if (fatal_error_printed.test())
|
||||
break;
|
||||
|
||||
/// This coarse method of synchronization is perfectly ok for fatal signals.
|
||||
sleepForSeconds(1);
|
||||
}
|
||||
|
||||
/// Wait for all logs flush operations
|
||||
sleepForSeconds(3);
|
||||
call_default_signal_handler(sig);
|
||||
}
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
|
||||
static bool getenvBool(const char * name)
|
||||
{
|
||||
@ -207,445 +96,6 @@ static bool getenvBool(const char * name)
|
||||
}
|
||||
|
||||
|
||||
/// Avoid link time dependency on DB/Interpreters - will use this function only when linked.
|
||||
__attribute__((__weak__)) void collectCrashLog(
|
||||
Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace);
|
||||
|
||||
|
||||
/** The thread that read info about signal or std::terminate from pipe.
|
||||
* On HUP, close log files (for new files to be opened later).
|
||||
* On information about std::terminate, write it to log.
|
||||
* On other signals, write info to log.
|
||||
*/
|
||||
class SignalListener : public Poco::Runnable
|
||||
{
|
||||
public:
|
||||
static constexpr int StdTerminate = -1;
|
||||
static constexpr int StopThread = -2;
|
||||
static constexpr int SanitizerTrap = -3;
|
||||
|
||||
explicit SignalListener(BaseDaemon & daemon_)
|
||||
: log(getLogger("BaseDaemon"))
|
||||
, daemon(daemon_)
|
||||
{
|
||||
}
|
||||
|
||||
void run() override
|
||||
{
|
||||
static_assert(PIPE_BUF >= 512);
|
||||
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
|
||||
char buf[signal_pipe_buf_size];
|
||||
ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
int sig = 0;
|
||||
readBinary(sig, in);
|
||||
// We may log some specific signals afterwards, with different log
|
||||
// levels and more info, but for completeness we log all signals
|
||||
// here at trace level.
|
||||
// Don't use strsignal here, because it's not thread-safe.
|
||||
LOG_TRACE(log, "Received signal {}", sig);
|
||||
|
||||
if (sig == StopThread)
|
||||
{
|
||||
LOG_INFO(log, "Stop SignalListener thread");
|
||||
break;
|
||||
}
|
||||
else if (sig == SIGHUP)
|
||||
{
|
||||
LOG_DEBUG(log, "Received signal to close logs.");
|
||||
BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger());
|
||||
LOG_INFO(log, "Opened new log file after received signal.");
|
||||
}
|
||||
else if (sig == StdTerminate)
|
||||
{
|
||||
UInt32 thread_num;
|
||||
std::string message;
|
||||
|
||||
readBinary(thread_num, in);
|
||||
readBinary(message, in);
|
||||
|
||||
onTerminate(message, thread_num);
|
||||
}
|
||||
else if (sig == SIGINT ||
|
||||
sig == SIGQUIT ||
|
||||
sig == SIGTERM)
|
||||
{
|
||||
daemon.handleSignal(sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
siginfo_t info{};
|
||||
ucontext_t * context{};
|
||||
StackTrace stack_trace(NoCapture{});
|
||||
std::vector<StackTrace::FramePointers> thread_frame_pointers;
|
||||
UInt32 thread_num{};
|
||||
ThreadStatus * thread_ptr{};
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
readPODBinary(info, in);
|
||||
readPODBinary(context, in);
|
||||
}
|
||||
|
||||
readPODBinary(stack_trace, in);
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
readVectorBinary(thread_frame_pointers, in);
|
||||
|
||||
readBinary(thread_num, in);
|
||||
readPODBinary(thread_ptr, in);
|
||||
|
||||
/// This allows to receive more signals if failure happens inside onFault function.
|
||||
/// Example: segfault while symbolizing stack trace.
|
||||
try
|
||||
{
|
||||
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); }).detach();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Likely cannot allocate thread
|
||||
onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
BaseDaemon & daemon;
|
||||
|
||||
void onTerminate(std::string_view message, UInt32 thread_num) const
|
||||
{
|
||||
size_t pos = message.find('\n');
|
||||
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, thread_num, message.substr(0, pos));
|
||||
|
||||
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
|
||||
while (pos != std::string_view::npos)
|
||||
{
|
||||
++pos;
|
||||
size_t next_pos = message.find('\n', pos);
|
||||
size_t size = next_pos;
|
||||
if (next_pos != std::string_view::npos)
|
||||
size = next_pos - pos;
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(message.substr(pos, size)));
|
||||
pos = next_pos;
|
||||
}
|
||||
}
|
||||
|
||||
void onFault(
|
||||
int sig,
|
||||
const siginfo_t & info,
|
||||
ucontext_t * context,
|
||||
const StackTrace & stack_trace,
|
||||
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
|
||||
UInt32 thread_num,
|
||||
ThreadStatus * thread_ptr) const
|
||||
try
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
/// First log those fields that are safe to access and that should not cause new fault.
|
||||
/// That way we will have some duplicated info in the log but we don't loose important info
|
||||
/// in case of double fault.
|
||||
|
||||
LOG_FATAL(log, "########## Short fault info ############");
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
|
||||
thread_num, sig);
|
||||
|
||||
std::string signal_description = "Unknown signal";
|
||||
|
||||
/// Some of these are not really signals, but our own indications on failure reason.
|
||||
if (sig == StdTerminate)
|
||||
signal_description = "std::terminate";
|
||||
else if (sig == SanitizerTrap)
|
||||
signal_description = "sanitizer trap";
|
||||
else if (sig >= 0)
|
||||
signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
|
||||
|
||||
LOG_FATAL(log, "Signal description: {}", signal_description);
|
||||
|
||||
String error_message;
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
error_message = signalToErrorMessage(sig, info, *context);
|
||||
else
|
||||
error_message = "Sanitizer trap.";
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
String bare_stacktrace_str;
|
||||
if (stack_trace.getSize())
|
||||
{
|
||||
/// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
|
||||
/// NOTE: This still require memory allocations and mutex lock inside logger.
|
||||
/// BTW we can also print it to stderr using write syscalls.
|
||||
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Stack trace:", bare_stacktrace);
|
||||
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
bare_stacktrace_str = bare_stacktrace.str();
|
||||
}
|
||||
|
||||
/// Now try to access potentially unsafe data in thread_ptr.
|
||||
|
||||
String query_id;
|
||||
String query;
|
||||
|
||||
/// Send logs from this thread to client if possible.
|
||||
/// It will allow client to see failure messages directly.
|
||||
if (thread_ptr)
|
||||
{
|
||||
query_id = thread_ptr->getQueryId();
|
||||
query = thread_ptr->getQueryForLog();
|
||||
|
||||
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
|
||||
{
|
||||
CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_FATAL(log, "########################################");
|
||||
|
||||
if (query_id.empty())
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
|
||||
thread_num, signal_description, sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
|
||||
thread_num, query_id, query, signal_description, sig);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
if (!bare_stacktrace_str.empty())
|
||||
{
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace_str));
|
||||
}
|
||||
|
||||
/// Write symbolized stack trace line by line for better grep-ability.
|
||||
stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
|
||||
/// In case it's a scheduled job write all previous jobs origins call stacks
|
||||
std::for_each(thread_frame_pointers.rbegin(), thread_frame_pointers.rend(),
|
||||
[this](const StackTrace::FramePointers & frame_pointers)
|
||||
{
|
||||
if (size_t size = std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin())
|
||||
{
|
||||
LOG_FATAL(log, "========================================");
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Job's origin stack trace:", bare_stacktrace);
|
||||
std::for_each_n(frame_pointers.begin(), size,
|
||||
[&bare_stacktrace](const void * ptr)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(ptr, bare_stacktrace);
|
||||
}
|
||||
);
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
|
||||
StackTrace::toStringEveryLine(const_cast<void **>(frame_pointers.data()), 0, size, [this](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
|
||||
/// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
|
||||
|
||||
if (daemon.stored_binary_hash.empty())
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read.");
|
||||
}
|
||||
else
|
||||
{
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
if (calculated_binary_hash == daemon.stored_binary_hash)
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(
|
||||
log,
|
||||
"Calculated checksum of the executable ({0}) does not correspond"
|
||||
" to the reference checksum stored in the executable ({1})."
|
||||
" This may indicate one of the following:"
|
||||
" - the executable was changed just after startup;"
|
||||
" - the executable was corrupted on disk due to faulty hardware;"
|
||||
" - the loaded executable was corrupted in memory due to faulty hardware;"
|
||||
" - the file was intentionally modified;"
|
||||
" - a logical error in the code.",
|
||||
calculated_binary_hash,
|
||||
daemon.stored_binary_hash);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Write crash to system.crash_log table if available.
|
||||
if (collectCrashLog)
|
||||
collectCrashLog(sig, thread_num, query_id, stack_trace);
|
||||
|
||||
Context::getGlobalContextInstance()->handleCrash();
|
||||
|
||||
/// Send crash report to developers (if configured)
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
if (auto * sentry = SentryWriter::getInstance())
|
||||
sentry->onSignal(sig, error_message, stack_trace.getFramePointers(), stack_trace.getOffset(), stack_trace.getSize());
|
||||
|
||||
/// Advice the user to send it manually.
|
||||
if (std::string_view(VERSION_OFFICIAL).contains("official build"))
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
|
||||
/// Approximate support period, upper bound.
|
||||
if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400)
|
||||
{
|
||||
LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues");
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
|
||||
}
|
||||
}
|
||||
|
||||
/// List changed settings.
|
||||
if (!query_id.empty())
|
||||
{
|
||||
ContextPtr query_context = thread_ptr->getQueryContext();
|
||||
if (query_context)
|
||||
{
|
||||
String changed_settings = query_context->getSettingsRef().toString();
|
||||
|
||||
if (changed_settings.empty())
|
||||
LOG_FATAL(log, "No settings were changed");
|
||||
else
|
||||
LOG_FATAL(log, "Changed settings: {}", changed_settings);
|
||||
}
|
||||
}
|
||||
|
||||
/// When everything is done, we will try to send these error messages to the client.
|
||||
if (thread_ptr)
|
||||
thread_ptr->onFatalError();
|
||||
|
||||
fatal_error_printed.test_and_set();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors.
|
||||
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
|
||||
LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#if defined(SANITIZER)
|
||||
|
||||
template <typename T>
|
||||
struct ValueHolder
|
||||
{
|
||||
ValueHolder(T value_) : value(value_)
|
||||
{}
|
||||
|
||||
T value;
|
||||
};
|
||||
|
||||
extern "C" void __sanitizer_set_death_callback(void (*)());
|
||||
|
||||
/// Sanitizers may not expect some function calls from death callback.
|
||||
/// Let's try to disable instrumentation to avoid possible issues.
|
||||
/// However, this callback may call other functions that are still instrumented.
|
||||
/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15)
|
||||
/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549.
|
||||
static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
/// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const StackTrace stack_trace;
|
||||
|
||||
writeBinary(SignalListener::SanitizerTrap, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
/// We create a dummy struct with a constructor so DISABLE_SANITIZER_INSTRUMENTATION is not applied to it
|
||||
/// otherwise, Memory sanitizer can't know that values initiialized inside this function are actually initialized
|
||||
/// because instrumentations are disabled leading to false positives later on
|
||||
ValueHolder<UInt32> thread_id{static_cast<UInt32>(getThreadId())};
|
||||
writeBinary(thread_id.value, out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
sleepForSeconds(20);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/** To use with std::set_terminate.
|
||||
* Collects slightly more info than __gnu_cxx::__verbose_terminate_handler,
|
||||
* and send it to pipe. Other thread will read this info from pipe and asynchronously write it to log.
|
||||
* Look at libstdc++-v3/libsupc++/vterminate.cc for example.
|
||||
*/
|
||||
[[noreturn]] static void terminate_handler()
|
||||
{
|
||||
static thread_local bool terminating = false;
|
||||
if (terminating)
|
||||
abort();
|
||||
|
||||
terminating = true;
|
||||
|
||||
std::string log_message;
|
||||
|
||||
if (std::current_exception())
|
||||
log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true);
|
||||
else
|
||||
log_message = "Terminate called without an active exception";
|
||||
|
||||
/// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
|
||||
/// And the buffer should not be too small because our exception messages can be large.
|
||||
static constexpr size_t buf_size = PIPE_BUF;
|
||||
|
||||
if (log_message.size() > buf_size - 16)
|
||||
log_message.resize(buf_size - 16);
|
||||
|
||||
char buf[buf_size];
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf);
|
||||
|
||||
writeBinary(static_cast<int>(SignalListener::StdTerminate), out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writeBinary(log_message, out);
|
||||
out.next();
|
||||
|
||||
abort();
|
||||
}
|
||||
|
||||
|
||||
static std::string createDirectory(const std::string & file)
|
||||
{
|
||||
fs::path path = fs::path(file).parent_path();
|
||||
@ -698,22 +148,7 @@ BaseDaemon::~BaseDaemon()
|
||||
{
|
||||
writeSignalIDtoSignalPipe(SignalListener::StopThread);
|
||||
signal_listener_thread.join();
|
||||
/// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
|
||||
for (int sig : handled_signals)
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
{
|
||||
try
|
||||
{
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
}
|
||||
catch (ErrnoException &)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
signal_pipe.close();
|
||||
|
||||
HandledSignals::instance().reset();
|
||||
SentryWriter::resetInstance();
|
||||
}
|
||||
|
||||
@ -752,6 +187,8 @@ void BaseDaemon::closeFDs()
|
||||
#else
|
||||
fs::path proc_path{"/proc/self/fd"};
|
||||
#endif
|
||||
|
||||
const auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
if (fs::is_directory(proc_path)) /// Hooray, proc exists
|
||||
{
|
||||
/// in /proc/self/fd directory filenames are numeric file descriptors.
|
||||
@ -972,56 +409,6 @@ void BaseDaemon::initialize(Application & self)
|
||||
}
|
||||
|
||||
|
||||
static void addSignalHandler(const std::vector<int> & signals, signal_function handler, std::vector<int> * out_handled_signals)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sa.sa_mask);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sa.sa_mask, signal);
|
||||
#else
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sa.sa_mask, signal))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
#endif
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaction(signal, &sa, nullptr))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
if (out_handled_signals)
|
||||
std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals));
|
||||
}
|
||||
|
||||
|
||||
static void blockSignals(const std::vector<int> & signals)
|
||||
{
|
||||
sigset_t sig_set;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sig_set);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sig_set, signal);
|
||||
#else
|
||||
if (sigemptyset(&sig_set))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sig_set, signal))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
#endif
|
||||
|
||||
if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
}
|
||||
|
||||
extern const char * GIT_HASH;
|
||||
|
||||
void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
@ -1045,29 +432,21 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
};
|
||||
}
|
||||
}
|
||||
std::set_terminate(terminate_handler);
|
||||
|
||||
/// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead.
|
||||
blockSignals({SIGPIPE});
|
||||
|
||||
/// Setup signal handlers.
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals);
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
|
||||
|
||||
#if defined(SANITIZER)
|
||||
__sanitizer_set_death_callback(sanitizerDeathCallback);
|
||||
#endif
|
||||
HandledSignals::instance().setupTerminateHandler();
|
||||
HandledSignals::instance().setupCommonDeadlySignalHandlers();
|
||||
HandledSignals::instance().setupCommonTerminateRequestSignalHandlers();
|
||||
HandledSignals::instance().addSignalHandler({SIGHUP}, closeLogsSignalHandler, true);
|
||||
|
||||
/// Set up Poco ErrorHandler for Poco Threads.
|
||||
static KillingErrorHandler killing_error_handler;
|
||||
Poco::ErrorHandler::set(&killing_error_handler);
|
||||
|
||||
signal_pipe.setNonBlockingWrite();
|
||||
signal_pipe.tryIncreaseSize(1 << 20);
|
||||
|
||||
signal_listener = std::make_unique<SignalListener>(*this);
|
||||
signal_listener = std::make_unique<SignalListener>(this, getLogger("BaseDaemon"));
|
||||
signal_listener_thread.start(*signal_listener);
|
||||
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
@ -1273,7 +652,7 @@ void BaseDaemon::setupWatchdog()
|
||||
/// Forward signals to the child process.
|
||||
if (forward_signals)
|
||||
{
|
||||
addSignalHandler(
|
||||
HandledSignals::instance().addSignalHandler(
|
||||
{SIGHUP, SIGINT, SIGQUIT, SIGTERM},
|
||||
[](int sig, siginfo_t *, void *)
|
||||
{
|
||||
@ -1289,7 +668,7 @@ void BaseDaemon::setupWatchdog()
|
||||
(void)res;
|
||||
}
|
||||
},
|
||||
nullptr);
|
||||
false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -168,8 +168,6 @@ protected:
|
||||
String git_hash;
|
||||
String stored_binary_hash;
|
||||
|
||||
std::vector<int> handled_signals;
|
||||
|
||||
bool should_setup_watchdog = false;
|
||||
char * argv0 = nullptr;
|
||||
};
|
||||
|
@ -1525,6 +1525,15 @@ class TestCase:
|
||||
start_time = args.testcase_start_time
|
||||
database = args.testcase_database
|
||||
|
||||
if args.client_log:
|
||||
log_opt = " --client_logs_file=" + args.client_log + " "
|
||||
client_options += log_opt
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"] = (
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"]
|
||||
if "CLICKHOUSE_CLIENT_OPT" in os.environ
|
||||
else ""
|
||||
) + log_opt
|
||||
|
||||
# This is for .sh tests
|
||||
os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename
|
||||
|
||||
@ -2994,6 +3003,15 @@ def main(args):
|
||||
else:
|
||||
print(colored("\nNo queries hung.", args, "green", attrs=["bold"]))
|
||||
|
||||
if args.client_log:
|
||||
if os.path.exists(args.client_log):
|
||||
with open(args.client_log, "rb") as stream:
|
||||
content = stream.read().decode()
|
||||
if len(content):
|
||||
print("Has fatal logs from client:\n")
|
||||
print(content)
|
||||
os.remove(args.client_log)
|
||||
|
||||
if len(restarted_tests) > 0:
|
||||
print("\nSome tests were restarted:\n")
|
||||
|
||||
@ -3395,6 +3413,11 @@ def parse_args():
|
||||
default=os.environ.get("REPLACE_MT_WITH_SMT", False),
|
||||
help="Replace ordinary MergeTree engine with SharedMergeTree",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--client-log",
|
||||
default="./client.fatal.log",
|
||||
help="Path to file for fatal logs from client",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--run-sequential-tests-in-parallel",
|
||||
|
Loading…
Reference in New Issue
Block a user