From 2655de30ca57214a74af70356abb0d66bb103f42 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 Nov 2024 16:08:01 +0000 Subject: [PATCH] Backport #72076 to 24.10: Minor improvement for system.query_metric_log stateless test --- src/Interpreters/QueryMetricLog.cpp | 20 ++++------- .../03203_system_query_metric_log.sh | 36 +++++++++++-------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/src/Interpreters/QueryMetricLog.cpp b/src/Interpreters/QueryMetricLog.cpp index 52d773b7d1b..34f04b355ab 100644 --- a/src/Interpreters/QueryMetricLog.cpp +++ b/src/Interpreters/QueryMetricLog.cpp @@ -102,8 +102,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer const auto query_info = process_list.getQueryInfo(query_id, false, true, false); if (!query_info) { - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_TRACE(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id); + LOG_TEST(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id); return; } @@ -114,8 +113,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer if (it == queries.end()) { global_lock.unlock(); - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_TRACE(logger, "Query {} not found in the list. Finished while this collecting task was running", query_id); + LOG_TEST(logger, "Query {} not found in the list. Finished while this collecting task was running", query_id); return; } @@ -123,8 +121,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer if (!query_status.mutex) { global_lock.unlock(); - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_TRACE(logger, "Query {} finished while this collecting task was running", query_id); + LOG_TEST(logger, "Query {} finished while this collecting task was running", query_id); return; } @@ -221,7 +218,7 @@ void QueryMetricLogStatus::scheduleNext(String query_id) } else { - LOG_TRACE(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now", + LOG_TEST(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now", query_id, timePointToString(info.next_collect_time)); info.task->schedule(); } @@ -229,15 +226,13 @@ void QueryMetricLogStatus::scheduleNext(String query_id) std::optional QueryMetricLogStatus::createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next) { - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_TRACE(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}", + LOG_TEST(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}", query_id, info.interval_milliseconds, timePointToString(query_info_time), schedule_next ? timePointToString(info.next_collect_time + std::chrono::milliseconds(info.interval_milliseconds)) : "finished"); if (query_info_time <= info.last_collect_time) { - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_TRACE(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id); + LOG_TEST(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id); return {}; } @@ -271,8 +266,7 @@ std::optional QueryMetricLogStatus::createLogMetricElemen } else { - /// TODO: remove trace before 24.11 release after checking everything is fine on the CI - LOG_DEBUG(logger, "Query {} has no profile counters", query_id); + LOG_TEST(logger, "Query {} has no profile counters", query_id); elem.profile_events = std::vector(ProfileEvents::end()); } diff --git a/tests/queries/0_stateless/03203_system_query_metric_log.sh b/tests/queries/0_stateless/03203_system_query_metric_log.sh index abcd14c8e5d..d8ae078fd5e 100755 --- a/tests/queries/0_stateless/03203_system_query_metric_log.sh +++ b/tests/queries/0_stateless/03203_system_query_metric_log.sh @@ -32,23 +32,31 @@ function check_log() # We calculate the diff of each row with its previous row to check whether the intervals at # which data is collected is right. The first row is always skipped because the diff with the # preceding one (itself) is 0. The last row is also skipped, because it doesn't contain a full - # interval. + # interval. We leave at least 20% of margin for many rows and at most 40% for one single row. $CLICKHOUSE_CLIENT --max_threads=1 -m -q """ SELECT '--Interval $interval: check that the delta/diff between the events is correct'; - WITH diff AS ( - SELECT - row_number() OVER () AS row, - count() OVER () as total_rows, - event_time_microseconds, - first_value(event_time_microseconds) OVER (ORDER BY event_time_microseconds ROWS BETWEEN 1 PRECEDING AND 0 FOLLOWING) as prev, - dateDiff('ms', prev, event_time_microseconds) AS diff - FROM system.query_metric_log - WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}' - ORDER BY event_time_microseconds - OFFSET 1 + WITH + (SELECT count() - 2 FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}') as diff_rows, + (20 + 20 / diff_rows)/100 AS margin + SELECT + avg(diff) BETWEEN (1 - margin) * $interval AND (1 + margin) * $interval + FROM ( + WITH diff AS ( + SELECT + row_number() OVER () AS row, + count() OVER () as total_rows, + event_time_microseconds, + first_value(event_time_microseconds) OVER (ORDER BY event_time_microseconds ROWS BETWEEN 1 PRECEDING AND 0 FOLLOWING) as prev, + dateDiff('ms', prev, event_time_microseconds) AS diff + FROM system.query_metric_log + WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}' + ORDER BY event_time_microseconds + OFFSET 1 + ) + SELECT avg(diff) AS diff + FROM diff + WHERE row < total_rows ) - SELECT avg(diff) BETWEEN $interval * 0.8 AND $interval * 1.2 - FROM diff WHERE row < total_rows """ # Check that the first event contains information from the beginning of the query.