ClickHouse/src/Common/OpenTelemetryTraceContext.cpp

374 lines
11 KiB
C++

#include "Interpreters/OpenTelemetrySpanLog.h"
#include <random>
#include <base/getThreadId.h>
#include <Common/Exception.h>
#include <Common/hex.h>
#include <Core/Settings.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace OpenTelemetry
{
thread_local TracingContextOnThread current_thread_trace_context;
void Span::addAttribute(std::string_view name, UInt64 value)
{
if (!this->isTraceEnabled() || name.empty())
return;
this->attributes.push_back(Tuple{name, toString(value)});
}
void Span::addAttributeIfNotZero(std::string_view name, UInt64 value)
{
if (value != 0)
addAttribute(name, value);
}
void Span::addAttribute(std::string_view name, std::string_view value)
{
if (!this->isTraceEnabled() || name.empty())
return;
this->attributes.push_back(Tuple{name, value});
}
void Span::addAttributeIfNotEmpty(std::string_view name, std::string_view value)
{
if (!this->isTraceEnabled() || name.empty() || value.empty())
return;
this->attributes.push_back(Tuple{name, value});
}
void Span::addAttribute(std::string_view name, std::function<String()> value_supplier)
{
if (!this->isTraceEnabled() || !value_supplier)
return;
String value = value_supplier();
if (value.empty())
return;
this->attributes.push_back(Tuple{name, value});
}
void Span::addAttribute(const Exception & e) noexcept
{
if (!this->isTraceEnabled())
return;
try
{
this->attributes.push_back(Tuple{"clickhouse.exception", getExceptionMessage(e, false)});
}
catch (...)
{
/// Ignore exceptions
}
}
void Span::addAttribute(std::exception_ptr e) noexcept
{
if (!this->isTraceEnabled() || e == nullptr)
return;
try
{
this->attributes.push_back(Tuple{"clickhouse.exception", getExceptionMessage(e, false)});
}
catch (...)
{
/// Ignore exceptions
}
}
SpanHolder::SpanHolder(std::string_view _operation_name)
{
if (!current_thread_trace_context.isTraceEnabled())
{
return;
}
/// Use try-catch to make sure the ctor is exception safe.
try
{
this->trace_id = current_thread_trace_context.trace_id;
this->parent_span_id = current_thread_trace_context.span_id;
this->span_id = thread_local_rng(); // create a new id for this span
this->operation_name = _operation_name;
this->start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Add new initialization here
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
/// Clear related fields to make sure the span won't be recorded.
this->trace_id = UUID();
return;
}
/// Set current span as parent of other spans created later on this thread.
current_thread_trace_context.span_id = this->span_id;
}
void SpanHolder::finish() noexcept
{
if (!this->isTraceEnabled())
return;
// First of all, restore old value of current span.
assert(current_thread_trace_context.span_id == span_id);
current_thread_trace_context.span_id = parent_span_id;
try
{
auto log = current_thread_trace_context.span_log.lock();
/// The log might be disabled, check it before use
if (log)
{
this->finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(*this));
}
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
}
trace_id = UUID();
}
SpanHolder::~SpanHolder()
{
finish();
}
bool TracingContext::parseTraceparentHeader(std::string_view traceparent, String & error)
{
trace_id = 0;
// Version 00, which is the only one we can parse, is fixed width. Use this
// fact for an additional sanity check.
const int expected_length = strlen("xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx");
if (traceparent.length() != expected_length)
{
error = fmt::format("unexpected length {}, expected {}", traceparent.length(), expected_length);
return false;
}
const char * data = traceparent.data();
uint8_t version = unhex2(data);
data += 2;
if (version != 0)
{
error = fmt::format("unexpected version {}, expected 00", version);
return false;
}
if (*data != '-')
{
error = fmt::format("Malformed traceparant header: {}", traceparent);
return false;
}
++data;
UInt64 trace_id_higher_64 = unhexUInt<UInt64>(data);
UInt64 trace_id_lower_64 = unhexUInt<UInt64>(data + 16);
data += 32;
if (*data != '-')
{
error = fmt::format("Malformed traceparant header: {}", traceparent);
return false;
}
++data;
UInt64 span_id_64 = unhexUInt<UInt64>(data);
data += 16;
if (*data != '-')
{
error = fmt::format("Malformed traceparant header: {}", traceparent);
return false;
}
++data;
this->trace_flags = unhex2(data);
this->trace_id.toUnderType().items[0] = trace_id_higher_64;
this->trace_id.toUnderType().items[1] = trace_id_lower_64;
this->span_id = span_id_64;
return true;
}
String TracingContext::composeTraceparentHeader() const
{
// This span is a parent for its children, so we specify this span_id as a
// parent id.
return fmt::format(
"00-{:016x}{:016x}-{:016x}-{:02x}",
trace_id.toUnderType().items[0],
trace_id.toUnderType().items[1],
span_id,
// This cast is needed because fmt is being weird and complaining that
// "mixing character types is not allowed".
static_cast<uint8_t>(trace_flags));
}
const TracingContextOnThread & CurrentContext()
{
return current_thread_trace_context;
}
void TracingContextOnThread::reset() noexcept
{
this->trace_id = UUID();
this->span_id = 0;
this->trace_flags = TRACE_FLAG_NONE;
this->tracestate = "";
this->span_log.reset();
}
TracingContextHolder::TracingContextHolder(
std::string_view _operation_name,
TracingContext _parent_trace_context,
const Settings * settings_ptr,
const std::weak_ptr<OpenTelemetrySpanLog> & _span_log)
{
/// Use try-catch to make sure the ctor is exception safe.
/// If any exception is raised during the construction, the tracing is not enabled on current thread.
try
{
if (current_thread_trace_context.isTraceEnabled())
{
///
/// This is not the normal case,
/// it means that construction of current object is not at the start of current thread.
/// Usually this is due to:
/// 1. bad design
/// 2. right design but code changes so that original point where this object is constructing is not the new start execution of current thread
///
/// In such case, we should use current context as parent of this new constructing object,
/// So this branch ensures this class can be instantiated multiple times on one same thread safely.
///
this->is_context_owner = false;
this->root_span.trace_id = current_thread_trace_context.trace_id;
this->root_span.parent_span_id = current_thread_trace_context.span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Set the root span as parent of other spans created on current thread
current_thread_trace_context.span_id = this->root_span.span_id;
return;
}
if (!_parent_trace_context.isTraceEnabled())
{
if (settings_ptr == nullptr)
/// Skip tracing context initialization on current thread
return;
// Start the trace with some configurable probability.
std::bernoulli_distribution should_start_trace{settings_ptr->opentelemetry_start_trace_probability};
if (!should_start_trace(thread_local_rng))
/// skip tracing context initialization on current thread
return;
while (_parent_trace_context.trace_id == UUID())
{
// Make sure the random generated trace_id is not 0 which is an invalid id.
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng(); //-V656
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng(); //-V656
}
_parent_trace_context.span_id = 0;
}
this->root_span.trace_id = _parent_trace_context.trace_id;
this->root_span.parent_span_id = _parent_trace_context.span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Add new initialization here
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
/// Clear related fields to make sure the tracing is not enabled.
this->root_span.trace_id = UUID();
return;
}
/// Set up trace context on current thread only when the root span is successfully initialized.
current_thread_trace_context = _parent_trace_context;
current_thread_trace_context.span_id = this->root_span.span_id;
current_thread_trace_context.trace_flags = TRACE_FLAG_SAMPLED;
current_thread_trace_context.span_log = _span_log;
}
TracingContextHolder::~TracingContextHolder()
{
if (!this->root_span.isTraceEnabled())
{
return;
}
try
{
auto shared_span_log = current_thread_trace_context.span_log.lock();
if (shared_span_log)
{
try
{
/// This object is created to initialize tracing context on a new thread,
/// it's helpful to record the thread_id so that we know the thread switching from the span log
this->root_span.addAttribute("clickhouse.thread_id", getThreadId());
}
catch (...)
{
/// It's acceptable that the attribute is not recorded in case of any exception,
/// so the exception is ignored to try to log the span.
}
this->root_span.finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
shared_span_log->add(OpenTelemetrySpanLogElement(this->root_span));
}
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
}
this->root_span.trace_id = UUID();
if (this->is_context_owner)
{
/// Clear the context on current thread
current_thread_trace_context.reset();
}
else
{
current_thread_trace_context.span_id = this->root_span.parent_span_id;
}
}
}
}