ClickHouse/src/Interpreters/OpenTelemetrySpanLog.cpp

276 lines
8.8 KiB
C++
Raw Normal View History

2020-10-22 16:47:20 +00:00
#include "OpenTelemetrySpanLog.h"
2020-08-20 20:59:40 +00:00
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
2020-08-20 20:59:40 +00:00
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeMap.h>
2020-08-20 20:59:40 +00:00
#include <DataTypes/DataTypeUUID.h>
#include <Interpreters/Context.h>
2020-08-20 20:59:40 +00:00
2021-05-04 22:42:14 +00:00
#include <Common/hex.h>
#include <Common/CurrentThread.h>
2021-05-04 22:42:14 +00:00
2020-08-20 20:59:40 +00:00
namespace DB
{
NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes()
2020-08-20 20:59:40 +00:00
{
return {
{"trace_id", std::make_shared<DataTypeUUID>()},
{"span_id", std::make_shared<DataTypeUInt64>()},
{"parent_span_id", std::make_shared<DataTypeUInt64>()},
{"operation_name", std::make_shared<DataTypeString>()},
2020-11-19 16:55:56 +00:00
// DateTime64 is really unwieldy -- there is no "normal" way to convert
// it to an UInt64 count of microseconds, except:
// 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just
// doesn't look sane;
// 2) things like toUInt64(toDecimal64(date, 6) * 1000000) that are also
// excessively verbose -- why do I have to write scale '6' again, and
// write out 6 zeros? -- and also don't work because of overflow.
// Also subtraction of two DateTime64 points doesn't work, so you can't
// get duration.
// It is much less hassle to just use UInt64 of microseconds.
{"start_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_date", std::make_shared<DataTypeDate>()},
{"attribute", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>())},
2020-08-20 20:59:40 +00:00
};
}
NamesAndAliases OpenTelemetrySpanLogElement::getNamesAndAliases()
{
return
{
{"attribute.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "mapKeys(attribute)"},
{"attribute.values", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "mapValues(attribute)"}
};
}
2020-11-19 15:52:11 +00:00
2020-08-20 20:59:40 +00:00
void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const
{
size_t i = 0;
2021-05-04 22:42:14 +00:00
columns[i++]->insert(trace_id);
2020-08-20 20:59:40 +00:00
columns[i++]->insert(span_id);
columns[i++]->insert(parent_span_id);
columns[i++]->insert(operation_name);
columns[i++]->insert(start_time_us);
columns[i++]->insert(finish_time_us);
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType());
2020-11-10 05:50:32 +00:00
// The user might add some ints values, and we will have Int Field, and the
// insert will fail because the column requires Strings. Convert the fields
// here, because it's hard to remember to convert them in all other places.
Map map(attribute_names.size());
for (size_t attr_idx = 0; attr_idx < map.size(); ++attr_idx)
2020-11-10 05:50:32 +00:00
{
map[attr_idx] = Tuple{attribute_names[attr_idx], toString(attribute_values[attr_idx])};
2020-11-10 05:50:32 +00:00
}
columns[i++]->insert(map);
2020-08-20 20:59:40 +00:00
}
2020-11-19 15:52:11 +00:00
OpenTelemetrySpanHolder::OpenTelemetrySpanHolder(const std::string & _operation_name)
{
2020-11-18 17:43:18 +00:00
trace_id = 0;
if (!CurrentThread::isInitialized())
{
// There may be no thread context if we're running inside the
// clickhouse-client, e.g. reading an external table provided with the
// `--external` option.
return;
}
auto & thread = CurrentThread::get();
2020-11-18 17:43:18 +00:00
trace_id = thread.thread_trace_context.trace_id;
2021-05-04 22:42:14 +00:00
if (trace_id == UUID())
return;
2020-11-18 17:43:18 +00:00
parent_span_id = thread.thread_trace_context.span_id;
span_id = thread_local_rng();
operation_name = _operation_name;
start_time_us = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
2020-11-18 17:43:18 +00:00
thread.thread_trace_context.span_id = span_id;
}
2020-11-19 15:52:11 +00:00
OpenTelemetrySpanHolder::~OpenTelemetrySpanHolder()
{
try
{
2021-05-04 22:42:14 +00:00
if (trace_id == UUID())
return;
// First of all, return old value of current span.
auto & thread = CurrentThread::get();
2020-11-27 03:21:22 +00:00
assert(thread.thread_trace_context.span_id == span_id);
2020-11-18 17:43:18 +00:00
thread.thread_trace_context.span_id = parent_span_id;
// Not sure what's the best way to access the log from here.
auto * thread_group = CurrentThread::getGroup().get();
// Not sure whether and when this can be null.
if (!thread_group)
return;
ContextPtr context;
{
std::lock_guard lock(thread_group->mutex);
context = thread_group->query_context.lock();
}
if (!context)
{
// Both global and query contexts can be null when executing a
// background task, and global context can be null for some
// queries.
return;
}
auto log = context->getOpenTelemetrySpanLog();
if (!log)
{
// The log might be disabled.
return;
}
finish_time_us = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(
static_cast<OpenTelemetrySpan>(*this)));
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
}
}
void OpenTelemetrySpanHolder::addAttribute(const std::string& name, UInt64 value)
{
if (trace_id == UUID())
return;
this->attribute_names.push_back(name);
this->attribute_values.push_back(std::to_string(value));
2022-06-20 09:53:35 +00:00
assert(this->attribute_names.size() == this->attribute_values.size());
}
void OpenTelemetrySpanHolder::addAttribute(const std::string& name, const std::string& value)
{
if (trace_id == UUID())
return;
this->attribute_names.push_back(name);
this->attribute_values.push_back(value);
2022-06-20 09:53:35 +00:00
assert(this->attribute_names.size() == this->attribute_values.size());
}
void OpenTelemetrySpanHolder::addAttribute(const Exception & e)
{
if (trace_id == UUID())
return;
this->attribute_names.push_back("clickhouse.exception");
this->attribute_values.push_back(getExceptionMessage(e, false));
2022-06-20 09:53:35 +00:00
assert(this->attribute_names.size() == this->attribute_values.size());
}
void OpenTelemetrySpanHolder::addAttribute(std::exception_ptr e)
{
if (trace_id == UUID() || e == nullptr)
return;
this->attribute_names.push_back("clickhouse.exception");
this->attribute_values.push_back(getExceptionMessage(e, false));
2022-06-20 09:53:35 +00:00
assert(this->attribute_names.size() == this->attribute_values.size());
}
2020-11-19 15:52:11 +00:00
bool OpenTelemetryTraceContext::parseTraceparentHeader(const std::string & traceparent,
std::string & error)
{
trace_id = 0;
// Version 00, which is the only one we can parse, is fixed width. Use this
// fact for an additional sanity check.
2021-05-04 22:42:14 +00:00
const int expected_length = strlen("xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx");
2020-11-19 15:52:11 +00:00
if (traceparent.length() != expected_length)
{
error = fmt::format("unexpected length {}, expected {}",
traceparent.length(), expected_length);
return false;
}
2021-05-04 22:42:14 +00:00
const char * data = traceparent.data();
uint8_t version = unhex2(data);
2021-05-04 22:42:14 +00:00
data += 2;
if (version != 0)
2020-11-19 15:52:11 +00:00
{
2021-05-04 22:42:14 +00:00
error = fmt::format("unexpected version {}, expected 00", version);
2020-11-19 15:52:11 +00:00
return false;
}
2021-05-04 22:42:14 +00:00
if (*data != '-')
2020-11-19 15:52:11 +00:00
{
2021-05-04 22:42:14 +00:00
error = fmt::format("Malformed traceparant header: {}", traceparent);
2020-11-19 15:52:11 +00:00
return false;
}
2021-05-04 22:42:14 +00:00
++data;
UInt64 trace_id_higher_64 = unhexUInt<UInt64>(data);
UInt64 trace_id_lower_64 = unhexUInt<UInt64>(data + 16);
2021-05-04 22:42:14 +00:00
data += 32;
if (*data != '-')
2020-11-19 15:52:11 +00:00
{
2021-05-04 22:42:14 +00:00
error = fmt::format("Malformed traceparant header: {}", traceparent);
return false;
}
++data;
UInt64 span_id_64 = unhexUInt<UInt64>(data);
2021-05-04 22:42:14 +00:00
data += 16;
if (*data != '-')
{
error = fmt::format("Malformed traceparant header: {}", traceparent);
2020-11-19 15:52:11 +00:00
return false;
}
2021-05-04 22:42:14 +00:00
++data;
this->trace_flags = unhex2(data);
// store the 128-bit trace id in big-endian order
this->trace_id.toUnderType().items[0] = trace_id_higher_64;
this->trace_id.toUnderType().items[1] = trace_id_lower_64;
this->span_id = span_id_64;
2020-11-19 15:52:11 +00:00
return true;
}
std::string OpenTelemetryTraceContext::composeTraceparentHeader() const
{
// This span is a parent for its children, so we specify this span_id as a
// parent id.
return fmt::format("00-{:016x}{:016x}-{:016x}-{:02x}",
// Output the trace id in network byte order
trace_id.toUnderType().items[0],
trace_id.toUnderType().items[1],
span_id,
// This cast is needed because fmt is being weird and complaining that
// "mixing character types is not allowed".
static_cast<uint8_t>(trace_flags));
2020-11-19 15:52:11 +00:00
}
2020-08-20 20:59:40 +00:00
}