mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
cleanup
This commit is contained in:
parent
84908df6d8
commit
145e2b012f
@ -1,9 +1,17 @@
|
||||
# [draft] OpenTelemetry support
|
||||
---
|
||||
toc_priority: 62
|
||||
toc_title: [experimental] OpenTelemetry
|
||||
---
|
||||
|
||||
# [experimental] OpenTelemetry Support
|
||||
|
||||
[OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting
|
||||
traces and metrics from distributed application. ClickHouse has some support
|
||||
for OpenTelemetry.
|
||||
|
||||
!!! warning "Warning"
|
||||
This is an experimental feature that will change in backwards-incompatible ways in the future releases.
|
||||
|
||||
|
||||
## Supplying Trace Context to ClickHouse
|
||||
|
||||
@ -40,9 +48,9 @@ a dependency on a particular monitoring system, instead only
|
||||
providing the tracing data conforming to the standard. A natural way to do so
|
||||
in an SQL RDBMS is a system table. OpenTelemetry trace span information
|
||||
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
|
||||
is stored in the system table called `system.opentelemetry_log`.
|
||||
is stored in the system table called `system.opentelemetry_span_log`.
|
||||
|
||||
The table must be enabled in the server configuration, see the `opentelemetry_log`
|
||||
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
|
||||
element in the default config file `config.xml`. It is enabled by default.
|
||||
|
||||
The table has the following columns:
|
@ -597,7 +597,7 @@
|
||||
<!--
|
||||
OpenTelemetry log contains OpenTelemetry trace spans.
|
||||
-->
|
||||
<opentelemetry_log>
|
||||
<opentelemetry_span_log>
|
||||
<!--
|
||||
The default table creation code is insufficient, this <engine> spec
|
||||
is a workaround. There is no 'event_time' for this log, but two times,
|
||||
@ -614,9 +614,9 @@
|
||||
order by (finish_date, finish_time_us, trace_id)
|
||||
</engine>
|
||||
<database>system</database>
|
||||
<table>opentelemetry_log</table>
|
||||
<table>opentelemetry_span_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</opentelemetry_log>
|
||||
</opentelemetry_span_log>
|
||||
|
||||
|
||||
<!-- Crash log. Stores stack traces for fatal errors.
|
||||
|
@ -133,8 +133,6 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
|
||||
client_version_patch = client_tcp_protocol_version;
|
||||
}
|
||||
|
||||
// TODO what does it even mean to read this structure over HTTP? I thought
|
||||
// this was for native protocol? See interface == Interface::HTTP.
|
||||
if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY)
|
||||
{
|
||||
uint8_t have_trace_id = 0;
|
||||
@ -145,10 +143,6 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
|
||||
readBinary(opentelemetry_span_id, in);
|
||||
readBinary(opentelemetry_tracestate, in);
|
||||
readBinary(opentelemetry_trace_flags, in);
|
||||
|
||||
fmt::print(stderr, "read {:x}, {}, {} at\n{}\n",
|
||||
opentelemetry_trace_id, opentelemetry_span_id,
|
||||
opentelemetry_parent_span_id, StackTrace().toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,15 +59,14 @@ public:
|
||||
String initial_query_id;
|
||||
Poco::Net::SocketAddress initial_address;
|
||||
|
||||
// OpenTelemetry things
|
||||
// OpenTelemetry trace information.
|
||||
__uint128_t opentelemetry_trace_id = 0;
|
||||
// Span ID is not strictly the client info, but convenient to keep here.
|
||||
// The span id we get the in the incoming client info becomes our parent span
|
||||
// id, and the span id we send becomes downstream parent span id.
|
||||
UInt64 opentelemetry_span_id = 0;
|
||||
UInt64 opentelemetry_parent_span_id = 0;
|
||||
// the incoming tracestate header, we just pass it downstream.
|
||||
// https://www.w3.org/TR/trace-context/
|
||||
// The incoming tracestate header and the trace flags, we just pass them downstream.
|
||||
// They are described at https://www.w3.org/TR/trace-context/
|
||||
String opentelemetry_tracestate;
|
||||
UInt8 opentelemetry_trace_flags = 0;
|
||||
|
||||
|
@ -1791,14 +1791,14 @@ std::shared_ptr<AsynchronousMetricLog> Context::getAsynchronousMetricLog()
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<OpenTelemetrySpanLog> Context::getOpenTelemetryLog()
|
||||
std::shared_ptr<OpenTelemetrySpanLog> Context::getOpenTelemetrySpanLog()
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (!shared->system_logs)
|
||||
return {};
|
||||
|
||||
return shared->system_logs->opentelemetry_log;
|
||||
return shared->system_logs->opentelemetry_span_log;
|
||||
}
|
||||
|
||||
|
||||
|
@ -543,7 +543,7 @@ public:
|
||||
std::shared_ptr<TextLog> getTextLog();
|
||||
std::shared_ptr<MetricLog> getMetricLog();
|
||||
std::shared_ptr<AsynchronousMetricLog> getAsynchronousMetricLog();
|
||||
std::shared_ptr<OpenTelemetrySpanLog> getOpenTelemetryLog();
|
||||
std::shared_ptr<OpenTelemetrySpanLog> getOpenTelemetrySpanLog();
|
||||
|
||||
/// Returns an object used to log operations with parts if it possible.
|
||||
/// Provide table name to make required checks.
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include <Interpreters/TextLog.h>
|
||||
#include <Interpreters/MetricLog.h>
|
||||
#include <Interpreters/AsynchronousMetricLog.h>
|
||||
#include <Interpreters/OpenTelemetryLog.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Access/AllowedClientHosts.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
@ -323,7 +323,7 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
[&] () { if (auto text_log = context.getTextLog()) text_log->flush(true); },
|
||||
[&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(true); },
|
||||
[&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); },
|
||||
[&] () { if (auto opentelemetry_log = context.getOpenTelemetryLog()) opentelemetry_log->flush(true); }
|
||||
[&] () { if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); }
|
||||
);
|
||||
break;
|
||||
case Type::STOP_LISTEN_QUERIES:
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "OpenTelemetryLog.h"
|
||||
#include "OpenTelemetrySpanLog.h"
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
@ -5,22 +5,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
struct OpenTelemetrySpanContext
|
||||
{
|
||||
UInt128 trace_id;
|
||||
UInt64 span_id;
|
||||
UInt8 trace_flags;
|
||||
String trace_state;
|
||||
};
|
||||
*/
|
||||
|
||||
// using TimeMicroseconds = std::chrono::time_point<
|
||||
// std::chrono::local_t,
|
||||
// std::chrono::duration<UInt64, std::micro>>;
|
||||
|
||||
// TODO figure out precisely which part of this is run time, and which part we
|
||||
// must log.
|
||||
struct OpenTelemetrySpan
|
||||
{
|
||||
__uint128_t trace_id;
|
||||
@ -35,13 +19,6 @@ struct OpenTelemetrySpan
|
||||
// I don't understand how Links work, namely, which direction should they
|
||||
// point to, and how they are related with parent_span_id, so no Links for
|
||||
// now.
|
||||
|
||||
// The following fields look like something that is runtime only and doesn't
|
||||
// require logging.
|
||||
UInt8 trace_flags;
|
||||
// Vendor-specific info, key-value pairs. Keep it as a string as described
|
||||
// here: https://w3c.github.io/trace-context/#tracestate-header
|
||||
String trace_state;
|
||||
};
|
||||
|
||||
struct OpenTelemetrySpanLogElement : public OpenTelemetrySpan
|
@ -7,7 +7,7 @@
|
||||
#include <Interpreters/CrashLog.h>
|
||||
#include <Interpreters/MetricLog.h>
|
||||
#include <Interpreters/AsynchronousMetricLog.h>
|
||||
#include <Interpreters/OpenTelemetryLog.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <common/logger_useful.h>
|
||||
@ -88,9 +88,9 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi
|
||||
asynchronous_metric_log = createSystemLog<AsynchronousMetricLog>(
|
||||
global_context, "system", "asynchronous_metric_log", config,
|
||||
"asynchronous_metric_log");
|
||||
opentelemetry_log = createSystemLog<OpenTelemetrySpanLog>(
|
||||
global_context, "system", "opentelemetry_log", config,
|
||||
"opentelemetry_log");
|
||||
opentelemetry_span_log = createSystemLog<OpenTelemetrySpanLog>(
|
||||
global_context, "system", "opentelemetry_span_log", config,
|
||||
"opentelemetry_span_log");
|
||||
|
||||
if (query_log)
|
||||
logs.emplace_back(query_log.get());
|
||||
@ -108,8 +108,8 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi
|
||||
logs.emplace_back(metric_log.get());
|
||||
if (asynchronous_metric_log)
|
||||
logs.emplace_back(asynchronous_metric_log.get());
|
||||
if (opentelemetry_log)
|
||||
logs.emplace_back(opentelemetry_log.get());
|
||||
if (opentelemetry_span_log)
|
||||
logs.emplace_back(opentelemetry_span_log.get());
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -106,8 +106,8 @@ struct SystemLogs
|
||||
std::shared_ptr<MetricLog> metric_log; /// Used to log all metrics.
|
||||
/// Metrics from system.asynchronous_metrics.
|
||||
std::shared_ptr<AsynchronousMetricLog> asynchronous_metric_log;
|
||||
/// OpenTelemetry trace spans
|
||||
std::shared_ptr<OpenTelemetrySpanLog> opentelemetry_log;
|
||||
/// OpenTelemetry trace spans.
|
||||
std::shared_ptr<OpenTelemetrySpanLog> opentelemetry_span_log;
|
||||
|
||||
std::vector<ISystemLog *> logs;
|
||||
};
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include <Access/EnabledQuota.h>
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/OpenTelemetryLog.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Interpreters/QueryLog.h>
|
||||
#include <Interpreters/InterpreterSetQuery.h>
|
||||
#include <Interpreters/ApplyWithGlobalVisitor.h>
|
||||
@ -245,9 +245,9 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c
|
||||
if (auto query_log = context.getQueryLog())
|
||||
query_log->add(elem);
|
||||
|
||||
if (auto opentelemetry_log = context.getOpenTelemetryLog();
|
||||
if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog();
|
||||
context.getClientInfo().opentelemetry_trace_id
|
||||
&& opentelemetry_log)
|
||||
&& opentelemetry_span_log)
|
||||
{
|
||||
OpenTelemetrySpanLogElement span;
|
||||
span.trace_id = context.getClientInfo().opentelemetry_trace_id;
|
||||
@ -275,7 +275,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c
|
||||
context.getClientInfo().opentelemetry_tracestate);
|
||||
}
|
||||
|
||||
opentelemetry_log->add(span);
|
||||
opentelemetry_span_log->add(span);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::FailedQuery);
|
||||
@ -663,9 +663,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
query_log->add(elem);
|
||||
}
|
||||
|
||||
if (auto opentelemetry_log = context.getOpenTelemetryLog();
|
||||
if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog();
|
||||
context.getClientInfo().opentelemetry_trace_id
|
||||
&& opentelemetry_log)
|
||||
&& opentelemetry_span_log)
|
||||
{
|
||||
OpenTelemetrySpanLogElement span;
|
||||
span.trace_id = context.getClientInfo().opentelemetry_trace_id;
|
||||
@ -692,7 +692,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
context.getClientInfo().opentelemetry_tracestate);
|
||||
}
|
||||
|
||||
opentelemetry_log->add(span);
|
||||
opentelemetry_span_log->add(span);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -119,7 +119,7 @@ SRCS(
|
||||
MutationsInterpreter.cpp
|
||||
MySQL/InterpretersMySQLDDLQuery.cpp
|
||||
NullableUtils.cpp
|
||||
OpenTelemetryLog.cpp
|
||||
OpenTelemetrySpanLog.cpp
|
||||
OptimizeIfChains.cpp
|
||||
OptimizeIfWithConstantConditionVisitor.cpp
|
||||
PartLog.cpp
|
||||
|
@ -15,7 +15,7 @@ select count(*) "'"'"total spans"'"'",
|
||||
uniqExact(span_id) "'"'"unique spans"'"'",
|
||||
uniqExactIf(parent_span_id, parent_span_id != 0)
|
||||
"'"'"unique non-zero parent spans"'"'"
|
||||
from system.opentelemetry_log
|
||||
from system.opentelemetry_span_log
|
||||
where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id')))
|
||||
and operation_name = 'query'
|
||||
;
|
||||
@ -24,7 +24,7 @@ select count(*) "'"'"total spans"'"'",
|
||||
select count(*) "'"'"initial query spans with proper parent"'"'"
|
||||
from
|
||||
(select *, attribute_name, attribute_value
|
||||
from system.opentelemetry_log
|
||||
from system.opentelemetry_span_log
|
||||
array join attribute.names as attribute_name,
|
||||
attribute.values as attribute_value) o
|
||||
join system.query_log on query_id = o.attribute_value
|
||||
@ -39,7 +39,7 @@ select count(*) "'"'"initial query spans with proper parent"'"'"
|
||||
-- Check that the tracestate header was propagated. It must have exactly the
|
||||
-- same non-empty value for all 'query' spans in this trace.
|
||||
select uniqExact(value) "'"'"unique non-empty tracestate values"'"'"
|
||||
from system.opentelemetry_log
|
||||
from system.opentelemetry_span_log
|
||||
array join attribute.names as name, attribute.values as value
|
||||
where
|
||||
trace_id = reinterpretAsUUID(reverse(unhex('$trace_id')))
|
||||
@ -106,7 +106,7 @@ ${CLICKHOUSE_CLIENT} -q "
|
||||
with count(*) as c
|
||||
-- expect 200 * 0.1 = 20 sampled events on average
|
||||
select if(c > 5 and c < 35, 'OK', 'fail: ' || toString(c))
|
||||
from system.opentelemetry_log
|
||||
from system.opentelemetry_span_log
|
||||
array join attribute.names as name, attribute.values as value
|
||||
where name = 'clickhouse.query_id'
|
||||
and operation_name = 'query'
|
||||
|
Loading…
Reference in New Issue
Block a user