mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
fix setting parent from HTTP traceparent
This commit is contained in:
parent
f3abb76e57
commit
84908df6d8
@ -2498,7 +2498,7 @@ public:
|
||||
{
|
||||
std::string traceparent = options["opentelemetry-traceparent"].as<std::string>();
|
||||
std::string error;
|
||||
if (!context.getClientInfo().setOpenTelemetryTraceparent(
|
||||
if (!context.getClientInfo().parseTraceparentHeader(
|
||||
traceparent, error))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
|
@ -70,7 +70,6 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision)
|
||||
// are random and will probably require the full length anyway.
|
||||
writeBinary(opentelemetry_trace_id, out);
|
||||
writeBinary(opentelemetry_span_id, out);
|
||||
writeBinary(opentelemetry_parent_span_id, out);
|
||||
writeBinary(opentelemetry_tracestate, out);
|
||||
writeBinary(opentelemetry_trace_flags, out);
|
||||
}
|
||||
@ -144,7 +143,6 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision)
|
||||
{
|
||||
readBinary(opentelemetry_trace_id, in);
|
||||
readBinary(opentelemetry_span_id, in);
|
||||
readBinary(opentelemetry_parent_span_id, in);
|
||||
readBinary(opentelemetry_tracestate, in);
|
||||
readBinary(opentelemetry_trace_flags, in);
|
||||
|
||||
@ -163,7 +161,7 @@ void ClientInfo::setInitialQuery()
|
||||
client_name = (DBMS_NAME " ") + client_name;
|
||||
}
|
||||
|
||||
bool ClientInfo::setOpenTelemetryTraceparent(const std::string & traceparent,
|
||||
bool ClientInfo::parseTraceparentHeader(const std::string & traceparent,
|
||||
std::string & error)
|
||||
{
|
||||
uint8_t version = -1;
|
||||
@ -215,13 +213,13 @@ bool ClientInfo::setOpenTelemetryTraceparent(const std::string & traceparent,
|
||||
|
||||
opentelemetry_trace_id = static_cast<__uint128_t>(trace_id_high) << 64
|
||||
| trace_id_low;
|
||||
opentelemetry_parent_span_id = trace_parent;
|
||||
opentelemetry_span_id = trace_parent;
|
||||
opentelemetry_trace_flags = trace_flags;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
std::string ClientInfo::getOpenTelemetryTraceparentForChild() const
|
||||
std::string ClientInfo::composeTraceparentHeader() const
|
||||
{
|
||||
// This span is a parent for its children, so we specify this span_id as a
|
||||
// parent id.
|
||||
|
@ -103,8 +103,15 @@ public:
|
||||
/// Initialize parameters on client initiating query.
|
||||
void setInitialQuery();
|
||||
|
||||
bool setOpenTelemetryTraceparent(const std::string & traceparent, std::string & error);
|
||||
std::string getOpenTelemetryTraceparentForChild() const;
|
||||
// Parse/compose OpenTelemetry traceparent header.
|
||||
// Note that these functions use span_id field, not parent_span_id, same as
|
||||
// in native protocol. The incoming traceparent corresponds to the upstream
|
||||
// trace span, and the outgoing traceparent corresponds to our current span.
|
||||
// We use the same ClientInfo structure first for incoming span, and then
|
||||
// for our span: when we switch, we use old span_id as parent_span_id, and
|
||||
// generate a new span_id (currently this happens in Context::setQueryId()).
|
||||
bool parseTraceparentHeader(const std::string & traceparent, std::string & error);
|
||||
std::string composeTraceparentHeader() const;
|
||||
|
||||
private:
|
||||
void fillOSUserHostNameAndVersionInfo();
|
||||
|
@ -310,10 +310,6 @@ void HTTPHandler::processQuery(
|
||||
session->release();
|
||||
});
|
||||
|
||||
// Set the query id supplied by the user, if any.
|
||||
context.setCurrentQueryId(params.get("query_id",
|
||||
request.get("X-ClickHouse-Query-Id", "")));
|
||||
|
||||
// Parse the OpenTelemetry traceparent header.
|
||||
// Disable in Arcadia -- it interferes with the
|
||||
// test_clickhouse.TestTracing.test_tracing_via_http_proxy[traceparent] test.
|
||||
@ -322,7 +318,7 @@ void HTTPHandler::processQuery(
|
||||
{
|
||||
std::string opentelemetry_traceparent = request.get("traceparent");
|
||||
std::string error;
|
||||
if (!context.getClientInfo().setOpenTelemetryTraceparent(
|
||||
if (!context.getClientInfo().parseTraceparentHeader(
|
||||
opentelemetry_traceparent, error))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER,
|
||||
@ -334,6 +330,11 @@ void HTTPHandler::processQuery(
|
||||
}
|
||||
#endif
|
||||
|
||||
// Set the query id supplied by the user, if any, and also update the
|
||||
// OpenTelemetry fields.
|
||||
context.setCurrentQueryId(params.get("query_id",
|
||||
request.get("X-ClickHouse-Query-Id", "")));
|
||||
|
||||
/// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
|
||||
String http_response_compression_methods = request.get("Accept-Encoding", "");
|
||||
CompressionMethod http_response_compression_method = CompressionMethod::None;
|
||||
|
@ -74,7 +74,7 @@ namespace
|
||||
if (client_info.opentelemetry_trace_id)
|
||||
{
|
||||
header.emplace_back("traceparent",
|
||||
client_info.getOpenTelemetryTraceparentForChild());
|
||||
client_info.composeTraceparentHeader());
|
||||
|
||||
if (!client_info.opentelemetry_tracestate.empty())
|
||||
{
|
||||
|
@ -1,10 +1,10 @@
|
||||
===http===
|
||||
1
|
||||
4
|
||||
1
|
||||
{"total spans":"4","unique spans":"4","unique non-zero parent spans":"2"}
|
||||
{"initial query spans with proper parent":"1"}
|
||||
{"unique non-empty tracestate values":"1"}
|
||||
===native===
|
||||
1
|
||||
2
|
||||
1
|
||||
{"total spans":"2","unique spans":"2","unique non-zero parent spans":"2"}
|
||||
{"initial query spans with proper parent":"1"}
|
||||
{"unique non-empty tracestate values":"1"}
|
||||
===sampled===
|
||||
OK
|
||||
|
@ -6,19 +6,39 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
function check_log
|
||||
{
|
||||
${CLICKHOUSE_CLIENT} -nq "
|
||||
${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq "
|
||||
system flush logs;
|
||||
|
||||
-- Check the number of spans with given trace id, to verify it was propagated.
|
||||
select count(*)
|
||||
-- Check the number of query spans with given trace id, to verify it was
|
||||
-- propagated.
|
||||
select count(*) "'"'"total spans"'"'",
|
||||
uniqExact(span_id) "'"'"unique spans"'"'",
|
||||
uniqExactIf(parent_span_id, parent_span_id != 0)
|
||||
"'"'"unique non-zero parent spans"'"'"
|
||||
from system.opentelemetry_log
|
||||
where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id')))
|
||||
and operation_name = 'query'
|
||||
;
|
||||
|
||||
-- Also check that the initial query span in ClickHouse has proper parent span.
|
||||
select count(*) "'"'"initial query spans with proper parent"'"'"
|
||||
from
|
||||
(select *, attribute_name, attribute_value
|
||||
from system.opentelemetry_log
|
||||
array join attribute.names as attribute_name,
|
||||
attribute.values as attribute_value) o
|
||||
join system.query_log on query_id = o.attribute_value
|
||||
where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id')))
|
||||
and operation_name = 'query'
|
||||
and parent_span_id = reinterpretAsUInt64(unhex('73'))
|
||||
and o.attribute_name = 'clickhouse.query_id'
|
||||
and is_initial_query
|
||||
and type = 'QueryFinish'
|
||||
;
|
||||
|
||||
-- Check that the tracestate header was propagated. It must have exactly the
|
||||
-- same non-empty value for all 'query' spans in this trace.
|
||||
select count(distinct value)
|
||||
select uniqExact(value) "'"'"unique non-empty tracestate values"'"'"
|
||||
from system.opentelemetry_log
|
||||
array join attribute.names as name, attribute.values as value
|
||||
where
|
||||
@ -34,14 +54,15 @@ select count(distinct value)
|
||||
echo "===http==="
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))")
|
||||
|
||||
# Check that the HTTP traceparent is read, and then passed through `remote` table function.
|
||||
# We expect 4 queries, because there are two DESC TABLE queries for the shard.
|
||||
# This is bug-ish, see https://github.com/ClickHouse/ClickHouse/issues/14228
|
||||
# Check that the HTTP traceparent is read, and then passed through `remote`
|
||||
# table function. We expect 4 queries -- one initial, one SELECT and two
|
||||
# DESC TABLE. Two DESC TABLE instead of one looks like a bug, see the issue:
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/14228
|
||||
${CLICKHOUSE_CURL} \
|
||||
--header "traceparent: 00-$trace_id-0000000000000010-01" \
|
||||
--header "traceparent: 00-$trace_id-0000000000000073-01" \
|
||||
--header "tracestate: some custom state" "http://localhost:8123/" \
|
||||
--get \
|
||||
--data-urlencode "query=select 1 from remote('127.0.0.2', system, one)"
|
||||
--data-urlencode "query=select 1 from remote('127.0.0.2', system, one) format Null"
|
||||
|
||||
check_log
|
||||
|
||||
@ -52,14 +73,14 @@ echo "===native==="
|
||||
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))")
|
||||
|
||||
${CLICKHOUSE_CLIENT} \
|
||||
--opentelemetry-traceparent "00-$trace_id-0000000000000020-02" \
|
||||
--opentelemetry-traceparent "00-$trace_id-0000000000000073-01" \
|
||||
--opentelemetry-tracestate "another custom state" \
|
||||
--query "select * from url('http://127.0.0.2:8123/?query=select%201', CSV, 'a int')"
|
||||
--query "select * from url('http://127.0.0.2:8123/?query=select%201%20format%20Null', CSV, 'a int')"
|
||||
|
||||
check_log
|
||||
|
||||
# Test sampled tracing. The traces should be started with the specified probability,
|
||||
# only for initial queries.
|
||||
# Test sampled tracing. The traces should be started with the specified
|
||||
# probability, only for initial queries.
|
||||
echo "===sampled==="
|
||||
query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))")
|
||||
|
||||
@ -71,8 +92,8 @@ do
|
||||
--query "select 1 from remote('127.0.0.2', system, one) format Null" \
|
||||
&
|
||||
|
||||
# clickhouse-client is slow to start, so run them in parallel, but not too
|
||||
# much.
|
||||
# clickhouse-client is slow to start (initialization of DateLUT), so run
|
||||
# several clients in parallel, but not too many.
|
||||
if [[ $((i % 10)) -eq 0 ]]
|
||||
then
|
||||
wait
|
||||
@ -84,7 +105,7 @@ ${CLICKHOUSE_CLIENT} -q "system flush logs"
|
||||
${CLICKHOUSE_CLIENT} -q "
|
||||
with count(*) as c
|
||||
-- expect 200 * 0.1 = 20 sampled events on average
|
||||
select if(c > 10 and c < 30, 'OK', 'fail: ' || toString(c))
|
||||
select if(c > 5 and c < 35, 'OK', 'fail: ' || toString(c))
|
||||
from system.opentelemetry_log
|
||||
array join attribute.names as name, attribute.values as value
|
||||
where name = 'clickhouse.query_id'
|
||||
|
Loading…
Reference in New Issue
Block a user