mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #47499 from FrankChen021/span_kind
This commit is contained in:
commit
9cb018545c
@ -15,6 +15,13 @@ Columns:
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
|
||||
|
||||
- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span.
|
||||
- `INTERNAL` — Indicates that the span represents an internal operation within an application.
|
||||
- `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request.
|
||||
- `CLIENT` — Indicates that the span describes a request to some remote service.
|
||||
- `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts.
|
||||
- `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
|
||||
@ -42,6 +49,7 @@ trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
kind: INTERNAL
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
|
@ -506,7 +506,7 @@ void Connection::sendQuery(
|
||||
bool with_pending_data,
|
||||
std::function<void(const Progress &)>)
|
||||
{
|
||||
OpenTelemetry::SpanHolder span("Connection::sendQuery()");
|
||||
OpenTelemetry::SpanHolder span("Connection::sendQuery()", OpenTelemetry::CLIENT);
|
||||
span.addAttribute("clickhouse.query_id", query_id_);
|
||||
span.addAttribute("clickhouse.query", query);
|
||||
span.addAttribute("target", [this] () { return this->getHost() + ":" + std::to_string(this->getPort()); });
|
||||
|
@ -92,7 +92,7 @@ bool Span::addAttributeImpl(std::string_view name, std::string_view value) noexc
|
||||
return true;
|
||||
}
|
||||
|
||||
SpanHolder::SpanHolder(std::string_view _operation_name)
|
||||
SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
|
||||
{
|
||||
if (!current_thread_trace_context.isTraceEnabled())
|
||||
{
|
||||
@ -106,6 +106,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name)
|
||||
this->parent_span_id = current_thread_trace_context.span_id;
|
||||
this->span_id = thread_local_rng(); // create a new id for this span
|
||||
this->operation_name = _operation_name;
|
||||
this->kind = _kind;
|
||||
this->start_time_us
|
||||
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
|
@ -13,6 +13,29 @@ class ReadBuffer;
|
||||
namespace OpenTelemetry
|
||||
{
|
||||
|
||||
/// See https://opentelemetry.io/docs/reference/specification/trace/api/#spankind
|
||||
enum SpanKind
|
||||
{
|
||||
/// Default value. Indicates that the span represents an internal operation within an application,
|
||||
/// as opposed to an operations with remote parents or children.
|
||||
INTERNAL = 0,
|
||||
|
||||
/// Indicates that the span covers server-side handling of a synchronous RPC or other remote request.
|
||||
/// This span is often the child of a remote CLIENT span that was expected to wait for a response.
|
||||
SERVER = 1,
|
||||
|
||||
/// Indicates that the span describes a request to some remote service.
|
||||
/// This span is usually the parent of a remote SERVER span and does not end until the response is received.
|
||||
CLIENT = 2,
|
||||
|
||||
/// Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts.
|
||||
/// In messaging scenarios with batching, tracing individual messages requires a new PRODUCER span per message to be created.
|
||||
PRODUCER = 3,
|
||||
|
||||
/// Indicates that the span describes a child of an asynchronous PRODUCER request
|
||||
CONSUMER = 4
|
||||
};
|
||||
|
||||
struct Span
|
||||
{
|
||||
UUID trace_id{};
|
||||
@ -21,6 +44,7 @@ struct Span
|
||||
String operation_name;
|
||||
UInt64 start_time_us = 0;
|
||||
UInt64 finish_time_us = 0;
|
||||
SpanKind kind = INTERNAL;
|
||||
Map attributes;
|
||||
|
||||
/// Following methods are declared as noexcept to make sure they're exception safe.
|
||||
@ -155,7 +179,7 @@ using TracingContextHolderPtr = std::unique_ptr<TracingContextHolder>;
|
||||
/// Once it's created or destructed, it automatically maitains the tracing context on the thread that it lives.
|
||||
struct SpanHolder : public Span
|
||||
{
|
||||
SpanHolder(std::string_view);
|
||||
SpanHolder(std::string_view, SpanKind _kind = INTERNAL);
|
||||
~SpanHolder();
|
||||
|
||||
/// Finish a span explicitly if needed.
|
||||
|
@ -542,6 +542,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ ,
|
||||
task.entry.tracing_context,
|
||||
this->context->getOpenTelemetrySpanLog());
|
||||
tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER;
|
||||
|
||||
String active_node_path = task.getActiveNodePath();
|
||||
String finished_node_path = task.getFinishedNodePath();
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <base/hex.h>
|
||||
@ -20,11 +21,23 @@ namespace DB
|
||||
|
||||
NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes()
|
||||
{
|
||||
auto span_kind_type = std::make_shared<DataTypeEnum8>(
|
||||
DataTypeEnum8::Values
|
||||
{
|
||||
{"INTERNAL", static_cast<Int8>(OpenTelemetry::INTERNAL)},
|
||||
{"SERVER", static_cast<Int8>(OpenTelemetry::SERVER)},
|
||||
{"CLIENT", static_cast<Int8>(OpenTelemetry::CLIENT)},
|
||||
{"PRODUCER", static_cast<Int8>(OpenTelemetry::PRODUCER)},
|
||||
{"CONSUMER", static_cast<Int8>(OpenTelemetry::CONSUMER)}
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
{"trace_id", std::make_shared<DataTypeUUID>()},
|
||||
{"span_id", std::make_shared<DataTypeUInt64>()},
|
||||
{"parent_span_id", std::make_shared<DataTypeUInt64>()},
|
||||
{"operation_name", std::make_shared<DataTypeString>()},
|
||||
{"kind", std::move(span_kind_type)},
|
||||
// DateTime64 is really unwieldy -- there is no "normal" way to convert
|
||||
// it to an UInt64 count of microseconds, except:
|
||||
// 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just
|
||||
@ -59,6 +72,7 @@ void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const
|
||||
columns[i++]->insert(span_id);
|
||||
columns[i++]->insert(parent_span_id);
|
||||
columns[i++]->insert(operation_name);
|
||||
columns[i++]->insert(kind);
|
||||
columns[i++]->insert(start_time_us);
|
||||
columns[i++]->insert(finish_time_us);
|
||||
columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType());
|
||||
|
@ -55,7 +55,7 @@ bool isSupportedAlterType(int type)
|
||||
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params)
|
||||
{
|
||||
OpenTelemetry::SpanHolder span(__FUNCTION__);
|
||||
OpenTelemetry::SpanHolder span(__FUNCTION__, OpenTelemetry::PRODUCER);
|
||||
|
||||
if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported");
|
||||
|
@ -843,6 +843,7 @@ namespace
|
||||
query_context->getClientInfo().client_trace_context,
|
||||
query_context->getSettingsRef(),
|
||||
query_context->getOpenTelemetrySpanLog());
|
||||
thread_trace_context->root_span.kind = OpenTelemetry::SERVER;
|
||||
|
||||
/// Prepare for sending exceptions and logs.
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
|
@ -1001,6 +1001,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
client_info.client_trace_context,
|
||||
context->getSettingsRef(),
|
||||
context->getOpenTelemetrySpanLog());
|
||||
thread_trace_context->root_span.kind = OpenTelemetry::SERVER;
|
||||
thread_trace_context->root_span.addAttribute("clickhouse.uri", request.getURI());
|
||||
|
||||
response.setContentType("text/plain; charset=UTF-8");
|
||||
|
@ -277,6 +277,7 @@ void TCPHandler::runImpl()
|
||||
query_context->getClientInfo().client_trace_context,
|
||||
query_context->getSettingsRef(),
|
||||
query_context->getOpenTelemetrySpanLog());
|
||||
thread_trace_context->root_span.kind = OpenTelemetry::SERVER;
|
||||
|
||||
query_scope.emplace(query_context, /* fatal_error_callback */ [this]
|
||||
{
|
||||
|
@ -1,8 +1,18 @@
|
||||
===1===
|
||||
{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
|
||||
{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
|
||||
1
|
||||
===2===
|
||||
{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
|
||||
{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
|
||||
3
|
||||
2
|
||||
===3===
|
||||
{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
|
||||
{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
|
||||
1
|
||||
===4===
|
||||
{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
|
||||
{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
|
||||
3
|
||||
2
|
||||
|
@ -42,6 +42,22 @@ ${CLICKHOUSE_CLIENT} -nq "
|
||||
;"
|
||||
}
|
||||
|
||||
#
|
||||
# $1 - OpenTelemetry Trace Id
|
||||
# $2 - value of insert_distributed_sync
|
||||
function check_span_kind()
|
||||
{
|
||||
${CLICKHOUSE_CLIENT} -nq "
|
||||
SYSTEM FLUSH LOGS;
|
||||
|
||||
SELECT count()
|
||||
FROM system.opentelemetry_span_log
|
||||
WHERE finish_date >= yesterday()
|
||||
AND lower(hex(trace_id)) = '${1}'
|
||||
AND kind = '${2}'
|
||||
;"
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# Prepare tables for tests
|
||||
@ -57,30 +73,46 @@ CREATE TABLE ${CLICKHOUSE_DATABASE}.local_opentelemetry (key UInt64) Engine=Merg
|
||||
#
|
||||
# test1
|
||||
#
|
||||
echo "===1==="
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
|
||||
insert $trace_id 0 1 "async-insert-writeToLocal"
|
||||
check_span $trace_id
|
||||
# 1 HTTP SERVER spans
|
||||
check_span_kind $trace_id 'SERVER'
|
||||
|
||||
#
|
||||
# test2
|
||||
#
|
||||
echo "===2==="
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
|
||||
insert $trace_id 0 0 "async-insert-writeToRemote"
|
||||
check_span $trace_id
|
||||
# 3 SERVER spans, 1 for HTTP, 2 for TCP
|
||||
check_span_kind $trace_id 'SERVER'
|
||||
# 2 CLIENT spans
|
||||
check_span_kind $trace_id 'CLIENT'
|
||||
|
||||
#
|
||||
# test3
|
||||
#
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
|
||||
insert $trace_id 1 1 "sync-insert-writeToLocal"
|
||||
echo "===3==="
|
||||
check_span $trace_id
|
||||
# 1 HTTP SERVER spans
|
||||
check_span_kind $trace_id 'SERVER'
|
||||
|
||||
#
|
||||
# test4
|
||||
#
|
||||
echo "===4==="
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
|
||||
insert $trace_id 1 0 "sync-insert-writeToRemote"
|
||||
check_span $trace_id
|
||||
# 3 SERVER spans, 1 for HTTP, 2 for TCP
|
||||
check_span_kind $trace_id 'SERVER'
|
||||
# 2 CLIENT spans
|
||||
check_span_kind $trace_id 'CLIENT'
|
||||
|
||||
#
|
||||
# Cleanup
|
||||
|
@ -144,8 +144,8 @@ else
|
||||
# For Replicated database it will fail on initiator before enqueueing distributed DDL
|
||||
expected=0
|
||||
fi
|
||||
check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'"
|
||||
check_span $expected $trace_id "%DDLWorker::processTask%"
|
||||
check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}' AND kind = 'PRODUCER'"
|
||||
check_span $expected $trace_id "%DDLWorker::processTask%" "kind = 'CONSUMER'"
|
||||
|
||||
if [ $cluster_name = "test_shard_localhost" ]; then
|
||||
# There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker.
|
||||
|
Loading…
Reference in New Issue
Block a user