mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 11:02:08 +00:00
177 lines
5.3 KiB
C++
177 lines
5.3 KiB
C++
#include "TraceCollector.h"
|
|
|
|
#include <Core/Field.h>
|
|
#include <IO/ReadBufferFromFileDescriptor.h>
|
|
#include <IO/ReadHelpers.h>
|
|
#include <IO/WriteBufferFromFileDescriptor.h>
|
|
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
|
#include <IO/WriteHelpers.h>
|
|
#include <Interpreters/TraceLog.h>
|
|
#include <Poco/Logger.h>
|
|
#include <Common/Exception.h>
|
|
#include <Common/PipeFDs.h>
|
|
#include <Common/StackTrace.h>
|
|
#include <Common/setThreadName.h>
|
|
#include <common/logger_useful.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace
|
|
{
|
|
/// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
|
|
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
|
|
///
|
|
/// And it cannot be large, since otherwise it will not fit into PIPE_BUF.
|
|
constexpr size_t QUERY_ID_MAX_LEN = sizeof("00000000-0000-0000-0000-000000000000") - 1; // 36
|
|
}
|
|
|
|
LazyPipeFDs pipe;
|
|
|
|
|
|
TraceCollector::TraceCollector(std::shared_ptr<TraceLog> trace_log_)
|
|
: trace_log(std::move(trace_log_))
|
|
{
|
|
pipe.open();
|
|
|
|
/** Turn write end of pipe to non-blocking mode to avoid deadlocks
|
|
* when QueryProfiler is invoked under locks and TraceCollector cannot pull data from pipe.
|
|
*/
|
|
pipe.setNonBlockingWrite();
|
|
pipe.tryIncreaseSize(1 << 20);
|
|
|
|
thread = ThreadFromGlobalPool(&TraceCollector::run, this);
|
|
}
|
|
|
|
|
|
TraceCollector::~TraceCollector()
|
|
{
|
|
if (!thread.joinable())
|
|
LOG_ERROR(&Poco::Logger::get("TraceCollector"), "TraceCollector thread is malformed and cannot be joined");
|
|
else
|
|
stop();
|
|
|
|
pipe.close();
|
|
}
|
|
|
|
|
|
void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trace, Int64 size)
|
|
{
|
|
constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
|
|
8 * sizeof(char) + // maximum VarUInt length for string size
|
|
QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
|
|
sizeof(UInt8) + // number of stack frames
|
|
sizeof(StackTrace::FramePointers) + // collected stack trace, maximum capacity
|
|
sizeof(TraceType) + // trace type
|
|
sizeof(UInt64) + // thread_id
|
|
sizeof(Int64); // size
|
|
/// Write should be atomic to avoid overlaps
|
|
/// (since recursive collect() is possible)
|
|
static_assert(buf_size < PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic");
|
|
|
|
char buffer[buf_size];
|
|
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
|
|
|
|
StringRef query_id;
|
|
UInt64 thread_id;
|
|
|
|
if (CurrentThread::isInitialized())
|
|
{
|
|
query_id = CurrentThread::getQueryId();
|
|
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
|
|
|
|
thread_id = CurrentThread::get().thread_id;
|
|
}
|
|
else
|
|
{
|
|
thread_id = MainThreadStatus::get()->thread_id;
|
|
}
|
|
|
|
writeChar(false, out); /// true if requested to stop the collecting thread.
|
|
writeStringBinary(query_id, out);
|
|
|
|
size_t stack_trace_size = stack_trace.getSize();
|
|
size_t stack_trace_offset = stack_trace.getOffset();
|
|
writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out);
|
|
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
|
|
writePODBinary(stack_trace.getFramePointers()[i], out);
|
|
|
|
writePODBinary(trace_type, out);
|
|
writePODBinary(thread_id, out);
|
|
writePODBinary(size, out);
|
|
|
|
out.next();
|
|
}
|
|
|
|
|
|
/** Sends TraceCollector stop message
|
|
*
|
|
* Each sequence of data for TraceCollector thread starts with a boolean flag.
|
|
* If this flag is true, TraceCollector must stop reading trace_pipe and exit.
|
|
* This function sends flag with a true value to stop TraceCollector gracefully.
|
|
*/
|
|
void TraceCollector::stop()
|
|
{
|
|
WriteBufferFromFileDescriptor out(pipe.fds_rw[1]);
|
|
writeChar(true, out);
|
|
out.next();
|
|
thread.join();
|
|
}
|
|
|
|
|
|
void TraceCollector::run()
|
|
{
|
|
setThreadName("TraceCollector");
|
|
|
|
ReadBufferFromFileDescriptor in(pipe.fds_rw[0]);
|
|
|
|
while (true)
|
|
{
|
|
char is_last;
|
|
readChar(is_last, in);
|
|
if (is_last)
|
|
break;
|
|
|
|
std::string query_id;
|
|
readStringBinary(query_id, in);
|
|
|
|
UInt8 trace_size = 0;
|
|
readIntBinary(trace_size, in);
|
|
|
|
Array trace;
|
|
trace.reserve(trace_size);
|
|
|
|
for (size_t i = 0; i < trace_size; i++)
|
|
{
|
|
uintptr_t addr = 0;
|
|
readPODBinary(addr, in);
|
|
trace.emplace_back(UInt64(addr));
|
|
}
|
|
|
|
TraceType trace_type;
|
|
readPODBinary(trace_type, in);
|
|
|
|
UInt64 thread_id;
|
|
readPODBinary(thread_id, in);
|
|
|
|
Int64 size;
|
|
readPODBinary(size, in);
|
|
|
|
if (trace_log)
|
|
{
|
|
// time and time_in_microseconds are both being constructed from the same timespec so that the
|
|
// times will be equal up to the precision of a second.
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_REALTIME, &ts);
|
|
|
|
UInt64 time = UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec);
|
|
UInt64 time_in_microseconds = UInt64((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000));
|
|
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size};
|
|
trace_log->add(element);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|