Merge pull request #72095 from ClickHouse/backport/24.10/72076

Backport #72076 to 24.10: Minor improvement for system.query_metric_log stateless test
This commit is contained in:
robot-ch-test-poll3 2024-11-19 19:05:28 +01:00 committed by GitHub
commit 86525cbf20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 27 deletions

View File

@ -102,8 +102,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer
const auto query_info = process_list.getQueryInfo(query_id, false, true, false);
if (!query_info)
{
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_TRACE(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id);
LOG_TEST(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id);
return;
}
@ -114,8 +113,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer
if (it == queries.end())
{
global_lock.unlock();
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_TRACE(logger, "Query {} not found in the list. Finished while this collecting task was running", query_id);
LOG_TEST(logger, "Query {} not found in the list. Finished while this collecting task was running", query_id);
return;
}
@ -123,8 +121,7 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer
if (!query_status.mutex)
{
global_lock.unlock();
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_TRACE(logger, "Query {} finished while this collecting task was running", query_id);
LOG_TEST(logger, "Query {} finished while this collecting task was running", query_id);
return;
}
@ -221,7 +218,7 @@ void QueryMetricLogStatus::scheduleNext(String query_id)
}
else
{
LOG_TRACE(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now",
LOG_TEST(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now",
query_id, timePointToString(info.next_collect_time));
info.task->schedule();
}
@ -229,15 +226,13 @@ void QueryMetricLogStatus::scheduleNext(String query_id)
std::optional<QueryMetricLogElement> QueryMetricLogStatus::createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next)
{
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_TRACE(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}",
LOG_TEST(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}",
query_id, info.interval_milliseconds, timePointToString(query_info_time),
schedule_next ? timePointToString(info.next_collect_time + std::chrono::milliseconds(info.interval_milliseconds)) : "finished");
if (query_info_time <= info.last_collect_time)
{
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_TRACE(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id);
LOG_TEST(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id);
return {};
}
@ -271,8 +266,7 @@ std::optional<QueryMetricLogElement> QueryMetricLogStatus::createLogMetricElemen
}
else
{
/// TODO: remove trace before 24.11 release after checking everything is fine on the CI
LOG_DEBUG(logger, "Query {} has no profile counters", query_id);
LOG_TEST(logger, "Query {} has no profile counters", query_id);
elem.profile_events = std::vector<ProfileEvents::Count>(ProfileEvents::end());
}

View File

@ -32,23 +32,31 @@ function check_log()
# We calculate the diff of each row with its previous row to check whether the intervals at
# which data is collected is right. The first row is always skipped because the diff with the
# preceding one (itself) is 0. The last row is also skipped, because it doesn't contain a full
# interval.
# interval. We leave at least 20% of margin for many rows and at most 40% for one single row.
$CLICKHOUSE_CLIENT --max_threads=1 -m -q """
SELECT '--Interval $interval: check that the delta/diff between the events is correct';
WITH diff AS (
SELECT
row_number() OVER () AS row,
count() OVER () as total_rows,
event_time_microseconds,
first_value(event_time_microseconds) OVER (ORDER BY event_time_microseconds ROWS BETWEEN 1 PRECEDING AND 0 FOLLOWING) as prev,
dateDiff('ms', prev, event_time_microseconds) AS diff
FROM system.query_metric_log
WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}'
ORDER BY event_time_microseconds
OFFSET 1
WITH
(SELECT count() - 2 FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}') as diff_rows,
(20 + 20 / diff_rows)/100 AS margin
SELECT
avg(diff) BETWEEN (1 - margin) * $interval AND (1 + margin) * $interval
FROM (
WITH diff AS (
SELECT
row_number() OVER () AS row,
count() OVER () as total_rows,
event_time_microseconds,
first_value(event_time_microseconds) OVER (ORDER BY event_time_microseconds ROWS BETWEEN 1 PRECEDING AND 0 FOLLOWING) as prev,
dateDiff('ms', prev, event_time_microseconds) AS diff
FROM system.query_metric_log
WHERE event_date >= yesterday() AND query_id = '${query_prefix}_${interval}'
ORDER BY event_time_microseconds
OFFSET 1
)
SELECT avg(diff) AS diff
FROM diff
WHERE row < total_rows
)
SELECT avg(diff) BETWEEN $interval * 0.8 AND $interval * 1.2
FROM diff WHERE row < total_rows
"""
# Check that the first event contains information from the beginning of the query.