ClickHouse/src/Interpreters/OpenTelemetrySpanLog.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

88 lines
3.5 KiB
C++
Raw Normal View History

2023-05-08 03:15:28 +00:00
#include <Interpreters/OpenTelemetrySpanLog.h>
2020-08-20 20:59:40 +00:00
#include <base/getFQDNOrHostName.h>
2020-08-20 20:59:40 +00:00
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
2023-05-08 03:15:28 +00:00
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeMap.h>
2020-08-20 20:59:40 +00:00
#include <DataTypes/DataTypeUUID.h>
2023-03-12 13:30:29 +00:00
#include <DataTypes/DataTypeEnum.h>
2021-05-04 22:42:14 +00:00
2020-08-20 20:59:40 +00:00
namespace DB
{
NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes()
2020-08-20 20:59:40 +00:00
{
2023-03-12 13:30:29 +00:00
auto span_kind_type = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"INTERNAL", static_cast<Int8>(OpenTelemetry::INTERNAL)},
{"SERVER", static_cast<Int8>(OpenTelemetry::SERVER)},
{"CLIENT", static_cast<Int8>(OpenTelemetry::CLIENT)},
{"PRODUCER", static_cast<Int8>(OpenTelemetry::PRODUCER)},
{"CONSUMER", static_cast<Int8>(OpenTelemetry::CONSUMER)}
}
);
2023-05-08 03:15:28 +00:00
auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
2020-08-20 20:59:40 +00:00
return {
{"hostname", low_cardinality_string},
{"trace_id", std::make_shared<DataTypeUUID>()},
{"span_id", std::make_shared<DataTypeUInt64>()},
{"parent_span_id", std::make_shared<DataTypeUInt64>()},
2023-05-08 03:15:28 +00:00
{"operation_name", low_cardinality_string},
2023-03-12 13:30:29 +00:00
{"kind", std::move(span_kind_type)},
2020-11-19 16:55:56 +00:00
// DateTime64 is really unwieldy -- there is no "normal" way to convert
// it to an UInt64 count of microseconds, except:
// 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just
// doesn't look sane;
// 2) things like toUInt64(toDecimal64(date, 6) * 1000000) that are also
// excessively verbose -- why do I have to write scale '6' again, and
// write out 6 zeros? -- and also don't work because of overflow.
// Also subtraction of two DateTime64 points doesn't work, so you can't
// get duration.
// It is much less hassle to just use UInt64 of microseconds.
{"start_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_date", std::make_shared<DataTypeDate>()},
2023-05-08 03:15:28 +00:00
{"attribute", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeString>())},
2020-08-20 20:59:40 +00:00
};
}
NamesAndAliases OpenTelemetrySpanLogElement::getNamesAndAliases()
{
2023-05-08 03:15:28 +00:00
auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
return
{
2023-05-08 03:15:28 +00:00
{"attribute.names", std::make_shared<DataTypeArray>(low_cardinality_string), "mapKeys(attribute)"},
{"attribute.values", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "mapValues(attribute)"}
};
}
2020-11-19 15:52:11 +00:00
2020-08-20 20:59:40 +00:00
void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const
{
size_t i = 0;
columns[i++]->insert(getFQDNOrHostName());
2021-05-04 22:42:14 +00:00
columns[i++]->insert(trace_id);
2020-08-20 20:59:40 +00:00
columns[i++]->insert(span_id);
columns[i++]->insert(parent_span_id);
columns[i++]->insert(operation_name);
2023-03-12 13:30:29 +00:00
columns[i++]->insert(kind);
columns[i++]->insert(start_time_us);
columns[i++]->insert(finish_time_us);
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType());
2020-11-10 05:50:32 +00:00
// The user might add some ints values, and we will have Int Field, and the
// insert will fail because the column requires Strings. Convert the fields
// here, because it's hard to remember to convert them in all other places.
2022-07-04 14:07:29 +00:00
columns[i++]->insert(attributes);
2020-08-20 20:59:40 +00:00
}
}