Add a new setting query_metric_log_debug to avoid the noise

This commit is contained in:
Pablo Marcos 2024-11-08 12:28:06 +00:00
parent 0dbc041d8b
commit 955f537bd5
6 changed files with 26 additions and 14 deletions

View File

@ -2784,6 +2784,11 @@ If set to any negative value, it will take the value `collect_interval_milliseco
To disable the collection of a single query, set `query_metric_log_interval` to 0.
Default value: -1
)", 0) \
DECLARE(Bool, query_metric_log_debug, false, R"(
Turns on debugging traces for system.query_metric_log
Default value: false
)", 0) \
DECLARE(LogsLevel, send_logs_level, LogsLevel::fatal, R"(
Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'

View File

@ -75,6 +75,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."},
{"query_plan_join_inner_table_selection", "auto", "auto", "New setting."},
{"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"},
{"query_metric_log_debug", false, false, "New setting."},
}
},
{"24.10",

View File

@ -144,11 +144,12 @@ void QueryMetricLog::collectMetric(const ProcessList & process_list, String quer
/// lock-order-inversion (potential deadlock)
/// which is not a real issue since QueryMetricLogStatus's mutex cannot be locked by anything else
/// until we add it to the queries map.
void QueryMetricLog::startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds) TSA_NO_THREAD_SAFETY_ANALYSIS
void QueryMetricLog::startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds, bool debug_traces) TSA_NO_THREAD_SAFETY_ANALYSIS
{
QueryMetricLogStatus query_status;
query_status.interval_milliseconds = interval_milliseconds;
query_status.next_collect_time = start_time;
query_status.debug_traces = debug_traces;
auto context = getContext();
const auto & process_list = context->getProcessList();
@ -222,21 +223,24 @@ void QueryMetricLogStatus::scheduleNext(String query_id)
}
else
{
LOG_TRACE(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now",
query_id, timePointToString(next_collect_time));
if (debug_traces)
LOG_DEBUG(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now",
query_id, timePointToString(next_collect_time));
task->schedule();
}
}
std::optional<QueryMetricLogElement> QueryMetricLogStatus::createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next)
{
LOG_TRACE(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}",
query_id, interval_milliseconds, timePointToString(query_info_time),
schedule_next ? timePointToString(next_collect_time + std::chrono::milliseconds(interval_milliseconds)) : "finished");
if (debug_traces)
LOG_DEBUG(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}",
query_id, interval_milliseconds, timePointToString(query_info_time),
schedule_next ? timePointToString(next_collect_time + std::chrono::milliseconds(interval_milliseconds)) : "finished");
if (query_info_time <= last_collect_time)
{
LOG_TRACE(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id);
if (debug_traces)
LOG_DEBUG(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id);
return {};
}

View File

@ -51,6 +51,7 @@ struct QueryMetricLogStatus
std::chrono::system_clock::time_point next_collect_time TSA_GUARDED_BY(getMutex());
std::vector<ProfileEvents::Count> last_profile_events TSA_GUARDED_BY(getMutex()) = std::vector<ProfileEvents::Count>(ProfileEvents::end());
BackgroundSchedulePool::TaskHolder task TSA_GUARDED_BY(getMutex());
bool debug_traces = false;
/// We need to be able to move it for the hash map, so we need to add an indirection here.
std::unique_ptr<Mutex> mutex = std::make_unique<Mutex>();
@ -78,7 +79,7 @@ public:
void shutdown() final;
/// Both startQuery and finishQuery are called from the thread that executes the query.
void startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds);
void startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds, bool debug_traces = false);
void finishQuery(const String & query_id, TimePoint finish_time, QueryStatusInfoPtr query_info = nullptr);
private:

View File

@ -146,6 +146,7 @@ namespace Setting
extern const SettingsQueryCacheSystemTableHandling query_cache_system_table_handling;
extern const SettingsSeconds query_cache_ttl;
extern const SettingsInt64 query_metric_log_interval;
extern const SettingsBool query_metric_log_debug;
extern const SettingsOverflowMode read_overflow_mode;
extern const SettingsOverflowMode read_overflow_mode_leaf;
extern const SettingsOverflowMode result_overflow_mode;
@ -455,7 +456,7 @@ QueryLogElement logQueryStart(
{
auto interval_milliseconds = getQueryMetricLogInterval(context);
if (interval_milliseconds > 0)
query_metric_log->startQuery(elem.client_info.current_query_id, query_start_time, interval_milliseconds);
query_metric_log->startQuery(elem.client_info.current_query_id, query_start_time, interval_milliseconds, settings[Setting::query_metric_log_debug]);
}
return elem;

View File

@ -6,11 +6,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
readonly query_prefix=$CLICKHOUSE_DATABASE
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_1000" -q "SELECT sleep(2.5) FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_400" -q "SELECT sleep(2.5) SETTINGS query_metric_log_interval=400 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_123" -q "SELECT sleep(2.5) SETTINGS query_metric_log_interval=123 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_0" -q "SELECT sleep(2.5) SETTINGS query_metric_log_interval=0 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_fast" -q "SELECT sleep(0.1) FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_1000" -q "SELECT sleep(2.5) SETTINGS query_metric_log_debug=true FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_400" -q "SELECT sleep(2.5) SETTINGS query_metric_log_debug=true, query_metric_log_interval=400 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_123" -q "SELECT sleep(2.5) SETTINGS query_metric_log_debug=true, query_metric_log_interval=123 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_0" -q "SELECT sleep(2.5) SETTINGS query_metric_log_debug=true, query_metric_log_interval=0 FORMAT Null" &
$CLICKHOUSE_CLIENT --query-id="${query_prefix}_fast" -q "SELECT sleep(0.1) SETTINGS query_metric_log_debug=true FORMAT Null" &
wait