2017-04-01 09:19:00 +00:00
# include <Interpreters/AsynchronousMetrics.h>
2020-06-10 19:17:30 +00:00
# include <Interpreters/AsynchronousMetricLog.h>
2021-06-13 12:38:57 +00:00
# include <Interpreters/JIT/CompiledExpressionCache.h>
2020-02-10 13:10:17 +00:00
# include <Interpreters/DatabaseCatalog.h>
2020-05-20 20:16:32 +00:00
# include <Interpreters/Context.h>
2017-04-01 09:19:00 +00:00
# include <Common/Exception.h>
# include <Common/setThreadName.h>
# include <Common/CurrentMetrics.h>
2017-07-13 20:58:19 +00:00
# include <Common/typeid_cast.h>
2021-07-04 21:22:58 +00:00
# include <Common/filesystemHelpers.h>
2020-12-17 13:47:03 +00:00
# include <Server/ProtocolServerAdapter.h>
2017-04-01 09:19:00 +00:00
# include <Storages/MarkCache.h>
# include <Storages/StorageMergeTree.h>
# include <Storages/StorageReplicatedMergeTree.h>
# include <IO/UncompressedCache.h>
2021-03-28 19:24:28 +00:00
# include <IO/MMappedFileCache.h>
2021-07-04 03:03:49 +00:00
# include <IO/ReadHelpers.h>
2017-04-01 09:19:00 +00:00
# include <Databases/IDatabase.h>
2016-10-23 06:12:50 +00:00
# include <chrono>
2020-04-17 04:09:41 +00:00
2020-04-16 12:31:57 +00:00
# if !defined(ARCADIA_BUILD)
# include "config_core.h"
2018-06-19 18:09:09 +00:00
# endif
2017-01-20 17:58:07 +00:00
2018-08-02 00:20:20 +00:00
# if USE_JEMALLOC
2020-04-16 12:31:57 +00:00
# include <jemalloc / jemalloc.h>
2018-08-02 00:20:20 +00:00
# endif
2016-10-23 06:12:50 +00:00
2020-04-25 12:36:01 +00:00
namespace CurrentMetrics
{
extern const Metric MemoryTracking ;
}
2016-10-23 06:12:50 +00:00
namespace DB
{
2021-07-04 20:49:36 +00:00
namespace ErrorCodes
2021-07-04 03:03:49 +00:00
{
2021-07-04 20:49:36 +00:00
extern const int CORRUPTED_DATA ;
extern const int CANNOT_SYSCONF ;
}
2021-07-05 22:12:49 +00:00
# if defined(OS_LINUX)
2021-07-04 20:49:36 +00:00
static constexpr size_t small_buffer_size = 4096 ;
2021-07-04 03:03:49 +00:00
2021-07-12 01:12:34 +00:00
static void openFileIfExists ( const char * filename , std : : optional < ReadBufferFromFilePRead > & out )
2021-07-04 20:49:36 +00:00
{
2021-07-04 03:03:49 +00:00
/// Ignoring time of check is not time of use cases, as procfs/sysfs files are fairly persistent.
std : : error_code ec ;
if ( std : : filesystem : : is_regular_file ( filename , ec ) )
out . emplace ( filename , small_buffer_size ) ;
}
2021-07-12 01:12:34 +00:00
static std : : unique_ptr < ReadBufferFromFilePRead > openFileIfExists ( const std : : string & filename )
2021-07-04 20:49:36 +00:00
{
std : : error_code ec ;
if ( std : : filesystem : : is_regular_file ( filename , ec ) )
2021-07-12 01:12:34 +00:00
return std : : make_unique < ReadBufferFromFilePRead > ( filename , small_buffer_size ) ;
2021-07-04 20:49:36 +00:00
return { } ;
}
2021-07-05 22:12:49 +00:00
# endif
2021-07-04 20:49:36 +00:00
2021-07-04 03:03:49 +00:00
AsynchronousMetrics : : AsynchronousMetrics (
ContextPtr global_context_ ,
int update_period_seconds ,
std : : shared_ptr < std : : vector < ProtocolServerAdapter > > servers_to_start_before_tables_ ,
std : : shared_ptr < std : : vector < ProtocolServerAdapter > > servers_ )
: WithContext ( global_context_ )
, update_period ( update_period_seconds )
, servers_to_start_before_tables ( servers_to_start_before_tables_ )
, servers ( servers_ )
2021-08-04 19:25:53 +00:00
, log ( & Poco : : Logger : : get ( " AsynchronousMetrics " ) )
2021-07-04 03:03:49 +00:00
{
# if defined(OS_LINUX)
openFileIfExists ( " /proc/meminfo " , meminfo ) ;
openFileIfExists ( " /proc/loadavg " , loadavg ) ;
openFileIfExists ( " /proc/stat " , proc_stat ) ;
openFileIfExists ( " /proc/cpuinfo " , cpuinfo ) ;
openFileIfExists ( " /proc/sys/fs/file-nr " , file_nr ) ;
2021-07-04 20:49:36 +00:00
openFileIfExists ( " /proc/uptime " , uptime ) ;
2021-07-05 02:47:33 +00:00
openFileIfExists ( " /proc/net/dev " , net_dev ) ;
2021-07-04 20:49:36 +00:00
2021-08-11 07:09:00 +00:00
openSensors ( ) ;
openBlockDevices ( ) ;
openEDAC ( ) ;
openSensorsChips ( ) ;
# endif
}
# if defined(OS_LINUX)
void AsynchronousMetrics : : openSensors ( )
{
LOG_TRACE ( log , " Scanning /sys/class/thermal " ) ;
thermal . clear ( ) ;
2021-07-05 00:51:22 +00:00
for ( size_t thermal_device_index = 0 ; ; + + thermal_device_index )
2021-07-04 20:49:36 +00:00
{
2021-07-12 01:12:34 +00:00
std : : unique_ptr < ReadBufferFromFilePRead > file = openFileIfExists ( fmt : : format ( " /sys/class/thermal/thermal_zone{}/temp " , thermal_device_index ) ) ;
2021-07-04 20:49:36 +00:00
if ( ! file )
2021-07-05 00:51:22 +00:00
{
/// Sometimes indices are from zero sometimes from one.
if ( thermal_device_index = = 0 )
continue ;
else
break ;
}
2021-07-04 20:49:36 +00:00
thermal . emplace_back ( std : : move ( file ) ) ;
}
2021-08-04 19:25:53 +00:00
}
void AsynchronousMetrics : : openBlockDevices ( )
{
LOG_TRACE ( log , " Scanning /sys/block " ) ;
if ( ! std : : filesystem : : exists ( " /sys/block " ) )
return ;
block_devices_rescan_delay . restart ( ) ;
block_devs . clear ( ) ;
for ( const auto & device_dir : std : : filesystem : : directory_iterator ( " /sys/block " ) )
2021-07-05 02:24:36 +00:00
{
2021-08-04 19:25:53 +00:00
String device_name = device_dir . path ( ) . filename ( ) ;
2021-07-05 02:24:36 +00:00
2021-08-04 19:25:53 +00:00
/// We are not interested in loopback devices.
if ( device_name . starts_with ( " loop " ) )
continue ;
2021-07-05 02:24:36 +00:00
2021-08-04 19:25:53 +00:00
std : : unique_ptr < ReadBufferFromFilePRead > file = openFileIfExists ( device_dir . path ( ) / " stat " ) ;
if ( ! file )
continue ;
2021-07-05 02:24:36 +00:00
2021-08-04 19:25:53 +00:00
block_devs [ device_name ] = std : : move ( file ) ;
2021-07-05 02:24:36 +00:00
}
2021-07-04 03:03:49 +00:00
}
2021-08-11 06:54:55 +00:00
2021-08-11 07:02:34 +00:00
void AsynchronousMetrics : : openEDAC ( )
{
LOG_TRACE ( log , " Scanning /sys/devices/system/edac " ) ;
edac . clear ( ) ;
for ( size_t edac_index = 0 ; ; + + edac_index )
{
String edac_correctable_file = fmt : : format ( " /sys/devices/system/edac/mc/mc{}/ce_count " , edac_index ) ;
String edac_uncorrectable_file = fmt : : format ( " /sys/devices/system/edac/mc/mc{}/ue_count " , edac_index ) ;
bool edac_correctable_file_exists = std : : filesystem : : exists ( edac_correctable_file ) ;
bool edac_uncorrectable_file_exists = std : : filesystem : : exists ( edac_uncorrectable_file ) ;
if ( ! edac_correctable_file_exists & & ! edac_uncorrectable_file_exists )
{
if ( edac_index = = 0 )
continue ;
else
break ;
}
edac . emplace_back ( ) ;
if ( edac_correctable_file_exists )
edac . back ( ) . first = openFileIfExists ( edac_correctable_file ) ;
if ( edac_uncorrectable_file_exists )
edac . back ( ) . second = openFileIfExists ( edac_uncorrectable_file ) ;
}
}
2021-08-11 06:54:55 +00:00
void AsynchronousMetrics : : openSensorsChips ( )
{
LOG_TRACE ( log , " Scanning /sys/class/hwmon " ) ;
hwmon_devices . clear ( ) ;
for ( size_t hwmon_index = 0 ; ; + + hwmon_index )
{
String hwmon_name_file = fmt : : format ( " /sys/class/hwmon/hwmon{}/name " , hwmon_index ) ;
if ( ! std : : filesystem : : exists ( hwmon_name_file ) )
{
if ( hwmon_index = = 0 )
continue ;
else
break ;
}
String hwmon_name ;
ReadBufferFromFilePRead hwmon_name_in ( hwmon_name_file , small_buffer_size ) ;
readText ( hwmon_name , hwmon_name_in ) ;
std : : replace ( hwmon_name . begin ( ) , hwmon_name . end ( ) , ' ' , ' _ ' ) ;
for ( size_t sensor_index = 0 ; ; + + sensor_index )
{
String sensor_name_file = fmt : : format ( " /sys/class/hwmon/hwmon{}/temp{}_label " , hwmon_index , sensor_index ) ;
String sensor_value_file = fmt : : format ( " /sys/class/hwmon/hwmon{}/temp{}_input " , hwmon_index , sensor_index ) ;
bool sensor_name_file_exists = std : : filesystem : : exists ( sensor_name_file ) ;
bool sensor_value_file_exists = std : : filesystem : : exists ( sensor_value_file ) ;
/// Sometimes there are labels but there is no files with data or vice versa.
if ( ! sensor_name_file_exists & & ! sensor_value_file_exists )
{
if ( sensor_index = = 0 )
continue ;
else
break ;
}
std : : unique_ptr < ReadBufferFromFilePRead > file = openFileIfExists ( sensor_value_file ) ;
if ( ! file )
continue ;
String sensor_name ;
if ( sensor_name_file_exists )
{
ReadBufferFromFilePRead sensor_name_in ( sensor_name_file , small_buffer_size ) ;
readText ( sensor_name , sensor_name_in ) ;
std : : replace ( sensor_name . begin ( ) , sensor_name . end ( ) , ' ' , ' _ ' ) ;
}
hwmon_devices [ hwmon_name ] [ sensor_name ] = std : : move ( file ) ;
}
}
}
2021-08-04 19:25:53 +00:00
# endif
2021-07-04 03:03:49 +00:00
void AsynchronousMetrics : : start ( )
{
/// Update once right now, to make metrics available just after server start
/// (without waiting for asynchronous_metrics_update_period_s).
2021-07-04 22:33:32 +00:00
update ( std : : chrono : : system_clock : : now ( ) ) ;
2021-07-04 03:03:49 +00:00
thread = std : : make_unique < ThreadFromGlobalPool > ( [ this ] { run ( ) ; } ) ;
}
2016-10-24 04:06:27 +00:00
AsynchronousMetrics : : ~ AsynchronousMetrics ( )
2016-10-23 06:12:50 +00:00
{
2017-04-01 07:20:54 +00:00
try
{
{
2020-06-10 19:17:30 +00:00
std : : lock_guard lock { mutex } ;
2017-04-01 07:20:54 +00:00
quit = true ;
}
wait_cond . notify_one ( ) ;
2020-12-17 13:47:03 +00:00
if ( thread )
thread - > join ( ) ;
2017-04-01 07:20:54 +00:00
}
catch ( . . . )
{
DB : : tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2016-10-23 06:12:50 +00:00
}
2020-06-10 19:17:30 +00:00
AsynchronousMetricValues AsynchronousMetrics : : getValues ( ) const
2016-10-23 06:12:50 +00:00
{
2020-06-10 19:17:30 +00:00
std : : lock_guard lock { mutex } ;
return values ;
2016-10-24 04:06:27 +00:00
}
2020-06-26 00:16:58 +00:00
static auto get_next_update_time ( std : : chrono : : seconds update_period )
2016-10-24 04:06:27 +00:00
{
2020-06-26 00:16:58 +00:00
using namespace std : : chrono ;
2017-04-01 07:20:54 +00:00
2020-06-26 00:16:58 +00:00
const auto now = time_point_cast < seconds > ( system_clock : : now ( ) ) ;
2020-06-25 20:36:50 +00:00
2020-06-26 00:16:58 +00:00
// Use seconds since the start of the hour, because we don't know when
// the epoch started, maybe on some weird fractional time.
const auto start_of_hour = time_point_cast < seconds > ( time_point_cast < hours > ( now ) ) ;
const auto seconds_passed = now - start_of_hour ;
2020-06-18 01:54:10 +00:00
2020-06-26 00:16:58 +00:00
// Rotate time forward by half a period -- e.g. if a period is a minute,
// we'll collect metrics on start of minute + 30 seconds. This is to
// achieve temporal separation with MetricTransmitter. Don't forget to
// rotate it back.
const auto rotation = update_period / 2 ;
2020-06-18 01:54:10 +00:00
2020-06-26 00:16:58 +00:00
const auto periods_passed = ( seconds_passed + rotation ) / update_period ;
const auto seconds_next = ( periods_passed + 1 ) * update_period - rotation ;
const auto time_next = start_of_hour + seconds_next ;
2020-06-18 01:54:10 +00:00
2020-06-26 00:16:58 +00:00
return time_next ;
}
2020-06-18 01:54:10 +00:00
2020-06-26 00:16:58 +00:00
void AsynchronousMetrics : : run ( )
{
setThreadName ( " AsyncMetrics " ) ;
2017-04-01 07:20:54 +00:00
while ( true )
{
2021-07-04 22:33:32 +00:00
auto next_update_time = get_next_update_time ( update_period ) ;
2020-06-18 01:54:10 +00:00
{
// Wait first, so that the first metric collection is also on even time.
std : : unique_lock lock { mutex } ;
2021-07-04 22:33:32 +00:00
if ( wait_cond . wait_until ( lock , next_update_time ,
2020-06-26 00:16:58 +00:00
[ this ] { return quit ; } ) )
{
2020-06-18 01:54:10 +00:00
break ;
2020-06-26 00:16:58 +00:00
}
2020-06-18 01:54:10 +00:00
}
2017-04-01 07:20:54 +00:00
try
{
2021-07-04 22:33:32 +00:00
update ( next_update_time ) ;
2017-04-01 07:20:54 +00:00
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
}
2016-10-23 06:12:50 +00:00
}
2016-10-23 06:38:53 +00:00
template < typename Max , typename T >
static void calculateMax ( Max & max , T x )
2016-10-23 06:12:50 +00:00
{
2017-04-01 07:20:54 +00:00
if ( Max ( x ) > max )
max = x ;
2016-10-23 06:12:50 +00:00
}
2016-10-23 06:38:53 +00:00
template < typename Max , typename Sum , typename T >
static void calculateMaxAndSum ( Max & max , Sum & sum , T x )
2016-10-23 06:12:50 +00:00
{
2017-04-01 07:20:54 +00:00
sum + = x ;
if ( Max ( x ) > max )
max = x ;
2016-10-23 06:12:50 +00:00
}
2020-06-18 01:54:10 +00:00
# if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
uint64_t updateJemallocEpoch ( )
{
uint64_t value = 0 ;
size_t size = sizeof ( value ) ;
mallctl ( " epoch " , & value , & size , & value , size ) ;
return value ;
}
template < typename Value >
static void saveJemallocMetricImpl ( AsynchronousMetricValues & values ,
const std : : string & jemalloc_full_name ,
const std : : string & clickhouse_full_name )
{
Value value { } ;
size_t size = sizeof ( value ) ;
mallctl ( jemalloc_full_name . c_str ( ) , & value , & size , nullptr , 0 ) ;
values [ clickhouse_full_name ] = value ;
}
template < typename Value >
static void saveJemallocMetric ( AsynchronousMetricValues & values ,
const std : : string & metric_name )
{
saveJemallocMetricImpl < Value > ( values ,
fmt : : format ( " stats.{} " , metric_name ) ,
fmt : : format ( " jemalloc.{} " , metric_name ) ) ;
}
template < typename Value >
static void saveAllArenasMetric ( AsynchronousMetricValues & values ,
const std : : string & metric_name )
{
saveJemallocMetricImpl < Value > ( values ,
fmt : : format ( " stats.arenas.{}.{} " , MALLCTL_ARENAS_ALL , metric_name ) ,
fmt : : format ( " jemalloc.arenas.all.{} " , metric_name ) ) ;
}
# endif
2016-10-23 06:12:50 +00:00
2021-07-04 20:49:36 +00:00
# if defined(OS_LINUX)
void AsynchronousMetrics : : ProcStatValuesCPU : : read ( ReadBuffer & in )
{
readText ( user , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( nice , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( system , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( idle , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( iowait , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( irq , in ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( in , true ) ;
2021-07-04 20:49:36 +00:00
readText ( softirq , in ) ;
2021-07-04 21:54:46 +00:00
/// Just in case for old Linux kernels, we check if these values present.
if ( ! checkChar ( ' \n ' , in ) )
{
skipWhitespaceIfAny ( in , true ) ;
readText ( steal , in ) ;
}
if ( ! checkChar ( ' \n ' , in ) )
{
skipWhitespaceIfAny ( in , true ) ;
readText ( guest , in ) ;
}
if ( ! checkChar ( ' \n ' , in ) )
{
skipWhitespaceIfAny ( in , true ) ;
readText ( guest_nice , in ) ;
}
2021-07-04 20:49:36 +00:00
skipToNextLineOrEOF ( in ) ;
}
AsynchronousMetrics : : ProcStatValuesCPU
AsynchronousMetrics : : ProcStatValuesCPU : : operator - ( const AsynchronousMetrics : : ProcStatValuesCPU & other ) const
{
ProcStatValuesCPU res { } ;
res . user = user - other . user ;
res . nice = nice - other . nice ;
res . system = system - other . system ;
res . idle = idle - other . idle ;
res . iowait = iowait - other . iowait ;
res . irq = irq - other . irq ;
res . softirq = softirq - other . softirq ;
res . steal = steal - other . steal ;
res . guest = guest - other . guest ;
res . guest_nice = guest_nice - other . guest_nice ;
return res ;
}
AsynchronousMetrics : : ProcStatValuesOther
AsynchronousMetrics : : ProcStatValuesOther : : operator - ( const AsynchronousMetrics : : ProcStatValuesOther & other ) const
{
ProcStatValuesOther res { } ;
res . interrupts = interrupts - other . interrupts ;
res . context_switches = context_switches - other . context_switches ;
res . processes_created = processes_created - other . processes_created ;
return res ;
}
2021-07-05 02:24:36 +00:00
void AsynchronousMetrics : : BlockDeviceStatValues : : read ( ReadBuffer & in )
{
skipWhitespaceIfAny ( in , true ) ;
readText ( read_ios , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( read_merges , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( read_sectors , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( read_ticks , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( write_ios , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( write_merges , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( write_sectors , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( write_ticks , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( in_flight_ios , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( io_ticks , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( time_in_queue , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( discard_ops , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( discard_merges , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( discard_sectors , in ) ;
skipWhitespaceIfAny ( in , true ) ;
readText ( discard_ticks , in ) ;
}
AsynchronousMetrics : : BlockDeviceStatValues
AsynchronousMetrics : : BlockDeviceStatValues : : operator - ( const AsynchronousMetrics : : BlockDeviceStatValues & other ) const
{
BlockDeviceStatValues res { } ;
res . read_ios = read_ios - other . read_ios ;
res . read_merges = read_merges - other . read_merges ;
res . read_sectors = read_sectors - other . read_sectors ;
res . read_ticks = read_ticks - other . read_ticks ;
res . write_ios = write_ios - other . write_ios ;
res . write_merges = write_merges - other . write_merges ;
res . write_sectors = write_sectors - other . write_sectors ;
res . write_ticks = write_ticks - other . write_ticks ;
res . in_flight_ios = in_flight_ios ; /// This is current value, not total.
res . io_ticks = io_ticks - other . io_ticks ;
res . time_in_queue = time_in_queue - other . time_in_queue ;
res . discard_ops = discard_ops - other . discard_ops ;
res . discard_merges = discard_merges - other . discard_merges ;
res . discard_sectors = discard_sectors - other . discard_sectors ;
res . discard_ticks = discard_ticks - other . discard_ticks ;
return res ;
}
2021-07-05 02:47:33 +00:00
AsynchronousMetrics : : NetworkInterfaceStatValues
AsynchronousMetrics : : NetworkInterfaceStatValues : : operator - ( const AsynchronousMetrics : : NetworkInterfaceStatValues & other ) const
{
NetworkInterfaceStatValues res { } ;
res . recv_bytes = recv_bytes - other . recv_bytes ;
res . recv_packets = recv_packets - other . recv_packets ;
res . recv_errors = recv_errors - other . recv_errors ;
res . recv_drop = recv_drop - other . recv_drop ;
res . send_bytes = send_bytes - other . send_bytes ;
res . send_packets = send_packets - other . send_packets ;
res . send_errors = send_errors - other . send_errors ;
res . send_drop = send_drop - other . send_drop ;
return res ;
}
2021-07-04 20:49:36 +00:00
# endif
2021-07-04 22:33:32 +00:00
void AsynchronousMetrics : : update ( std : : chrono : : system_clock : : time_point update_time )
2016-10-23 06:12:50 +00:00
{
2021-07-04 22:33:32 +00:00
Stopwatch watch ;
2020-06-10 19:17:30 +00:00
AsynchronousMetricValues new_values ;
2021-07-04 22:33:32 +00:00
auto current_time = std : : chrono : : system_clock : : now ( ) ;
2021-07-05 19:29:36 +00:00
auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time ;
2021-07-04 22:33:32 +00:00
previous_update_time = update_time ;
/// This is also a good indicator of system responsiveness.
new_values [ " Jitter " ] = std : : chrono : : duration_cast < std : : chrono : : nanoseconds > ( current_time - update_time ) . count ( ) / 1e9 ;
2017-04-01 07:20:54 +00:00
{
2021-04-10 23:33:54 +00:00
if ( auto mark_cache = getContext ( ) - > getMarkCache ( ) )
2017-04-01 07:20:54 +00:00
{
2020-06-10 19:17:30 +00:00
new_values [ " MarkCacheBytes " ] = mark_cache - > weight ( ) ;
new_values [ " MarkCacheFiles " ] = mark_cache - > count ( ) ;
2017-04-01 07:20:54 +00:00
}
}
{
2021-04-10 23:33:54 +00:00
if ( auto uncompressed_cache = getContext ( ) - > getUncompressedCache ( ) )
2017-04-01 07:20:54 +00:00
{
2020-06-10 19:17:30 +00:00
new_values [ " UncompressedCacheBytes " ] = uncompressed_cache - > weight ( ) ;
new_values [ " UncompressedCacheCells " ] = uncompressed_cache - > count ( ) ;
2017-04-01 07:20:54 +00:00
}
}
2021-03-28 01:10:30 +00:00
{
2021-04-10 23:33:54 +00:00
if ( auto mmap_cache = getContext ( ) - > getMMappedFileCache ( ) )
2021-03-28 01:10:30 +00:00
{
new_values [ " MMapCacheCells " ] = mmap_cache - > count ( ) ;
}
}
2018-09-03 10:14:05 +00:00
# if USE_EMBEDDED_COMPILER
{
2021-03-04 17:38:12 +00:00
if ( auto * compiled_expression_cache = CompiledExpressionCacheFactory : : instance ( ) . tryGetCache ( ) )
2021-05-10 08:09:32 +00:00
{
new_values [ " CompiledExpressionCacheBytes " ] = compiled_expression_cache - > weight ( ) ;
2020-06-10 19:17:30 +00:00
new_values [ " CompiledExpressionCacheCount " ] = compiled_expression_cache - > count ( ) ;
2021-05-10 08:09:32 +00:00
}
2018-09-03 10:14:05 +00:00
}
# endif
2021-04-10 23:33:54 +00:00
new_values [ " Uptime " ] = getContext ( ) - > getUptimeSeconds ( ) ;
2017-09-07 04:02:29 +00:00
2021-07-04 03:03:49 +00:00
/// Process process memory usage according to OS
2020-04-17 04:09:41 +00:00
# if defined(OS_LINUX)
{
2020-04-19 20:49:13 +00:00
MemoryStatisticsOS : : Data data = memory_stat . get ( ) ;
2020-06-10 19:17:30 +00:00
new_values [ " MemoryVirtual " ] = data . virt ;
new_values [ " MemoryResident " ] = data . resident ;
new_values [ " MemoryShared " ] = data . shared ;
new_values [ " MemoryCode " ] = data . code ;
new_values [ " MemoryDataAndStack " ] = data . data_and_stack ;
2020-04-19 21:43:06 +00:00
/// We must update the value of total_memory_tracker periodically.
/// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount.
/// See https://github.com/ClickHouse/ClickHouse/issues/10293
2020-10-30 19:02:02 +00:00
{
Int64 amount = total_memory_tracker . get ( ) ;
Int64 peak = total_memory_tracker . getPeak ( ) ;
2020-12-22 07:13:22 +00:00
Int64 new_amount = data . resident ;
2020-10-30 19:02:02 +00:00
2021-07-16 07:32:02 +00:00
Int64 difference = new_amount - amount ;
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
if ( difference > = 1048576 | | difference < = - 1048576 )
2021-08-04 19:25:53 +00:00
LOG_TRACE ( log ,
2021-07-16 07:32:02 +00:00
" MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {} " ,
ReadableSize ( amount ) ,
ReadableSize ( peak ) ,
ReadableSize ( new_amount ) ,
ReadableSize ( difference ) ) ;
2020-10-30 19:02:02 +00:00
2020-12-22 07:13:22 +00:00
total_memory_tracker . set ( new_amount ) ;
CurrentMetrics : : set ( CurrentMetrics : : MemoryTracking , new_amount ) ;
2020-10-30 19:02:02 +00:00
}
2020-04-17 04:09:41 +00:00
}
2021-07-04 03:03:49 +00:00
if ( loadavg )
2021-05-22 09:57:51 +00:00
{
2021-07-05 02:55:11 +00:00
try
{
loadavg - > rewind ( ) ;
Float64 loadavg1 = 0 ;
Float64 loadavg5 = 0 ;
Float64 loadavg15 = 0 ;
UInt64 threads_runnable = 0 ;
UInt64 threads_total = 0 ;
readText ( loadavg1 , * loadavg ) ;
skipWhitespaceIfAny ( * loadavg ) ;
readText ( loadavg5 , * loadavg ) ;
skipWhitespaceIfAny ( * loadavg ) ;
readText ( loadavg15 , * loadavg ) ;
skipWhitespaceIfAny ( * loadavg ) ;
readText ( threads_runnable , * loadavg ) ;
assertChar ( ' / ' , * loadavg ) ;
readText ( threads_total , * loadavg ) ;
new_values [ " LoadAverage1 " ] = loadavg1 ;
new_values [ " LoadAverage5 " ] = loadavg5 ;
new_values [ " LoadAverage15 " ] = loadavg15 ;
new_values [ " OSThreadsRunnable " ] = threads_runnable ;
new_values [ " OSThreadsTotal " ] = threads_total ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2021-07-04 03:03:49 +00:00
}
2021-07-04 20:49:36 +00:00
if ( uptime )
{
2021-07-05 02:55:11 +00:00
try
{
uptime - > rewind ( ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
Float64 uptime_seconds = 0 ;
readText ( uptime_seconds , * uptime ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " OSUptime " ] = uptime_seconds ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2021-07-04 20:49:36 +00:00
}
if ( proc_stat )
{
2021-07-05 02:55:11 +00:00
try
{
proc_stat - > rewind ( ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
int64_t hz = sysconf ( _SC_CLK_TCK ) ;
if ( - 1 = = hz )
throwFromErrno ( " Cannot call 'sysconf' to obtain system HZ " , ErrorCodes : : CANNOT_SYSCONF ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
double multiplier = 1.0 / hz / ( std : : chrono : : duration_cast < std : : chrono : : nanoseconds > ( time_after_previous_update ) . count ( ) / 1e9 ) ;
size_t num_cpus = 0 ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
ProcStatValuesOther current_other_values { } ;
ProcStatValuesCPU delta_values_all_cpus { } ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
while ( ! proc_stat - > eof ( ) )
2021-07-04 20:49:36 +00:00
{
2021-07-05 02:55:11 +00:00
String name ;
readStringUntilWhitespace ( name , * proc_stat ) ;
skipWhitespaceIfAny ( * proc_stat ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
if ( name . starts_with ( " cpu " ) )
{
String cpu_num_str = name . substr ( strlen ( " cpu " ) ) ;
UInt64 cpu_num = 0 ;
if ( ! cpu_num_str . empty ( ) )
{
cpu_num = parse < UInt64 > ( cpu_num_str ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
if ( cpu_num > 1000000 ) /// Safety check, arbitrary large number, suitable for supercomputing applications.
throw Exception ( ErrorCodes : : CORRUPTED_DATA , " Too many CPUs (at least {}) in ' / proc / stat ' file " , cpu_num) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
if ( proc_stat_values_per_cpu . size ( ) < = cpu_num )
proc_stat_values_per_cpu . resize ( cpu_num + 1 ) ;
}
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
ProcStatValuesCPU current_values { } ;
current_values . read ( * proc_stat ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
ProcStatValuesCPU & prev_values = ! cpu_num_str . empty ( ) ? proc_stat_values_per_cpu [ cpu_num ] : proc_stat_values_all_cpus ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
if ( ! first_run )
2021-07-04 21:54:46 +00:00
{
2021-07-05 02:55:11 +00:00
ProcStatValuesCPU delta_values = current_values - prev_values ;
String cpu_suffix ;
if ( ! cpu_num_str . empty ( ) )
{
cpu_suffix = " CPU " + cpu_num_str ;
+ + num_cpus ;
}
else
delta_values_all_cpus = delta_values ;
new_values [ " OSUserTime " + cpu_suffix ] = delta_values . user * multiplier ;
new_values [ " OSNiceTime " + cpu_suffix ] = delta_values . nice * multiplier ;
new_values [ " OSSystemTime " + cpu_suffix ] = delta_values . system * multiplier ;
new_values [ " OSIdleTime " + cpu_suffix ] = delta_values . idle * multiplier ;
new_values [ " OSIOWaitTime " + cpu_suffix ] = delta_values . iowait * multiplier ;
new_values [ " OSIrqTime " + cpu_suffix ] = delta_values . irq * multiplier ;
new_values [ " OSSoftIrqTime " + cpu_suffix ] = delta_values . softirq * multiplier ;
new_values [ " OSStealTime " + cpu_suffix ] = delta_values . steal * multiplier ;
new_values [ " OSGuestTime " + cpu_suffix ] = delta_values . guest * multiplier ;
new_values [ " OSGuestNiceTime " + cpu_suffix ] = delta_values . guest_nice * multiplier ;
2021-07-04 21:54:46 +00:00
}
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
prev_values = current_values ;
}
else if ( name = = " intr " )
{
readText ( current_other_values . interrupts , * proc_stat ) ;
skipToNextLineOrEOF ( * proc_stat ) ;
}
else if ( name = = " ctxt " )
{
readText ( current_other_values . context_switches , * proc_stat ) ;
skipToNextLineOrEOF ( * proc_stat ) ;
}
else if ( name = = " processes " )
{
readText ( current_other_values . processes_created , * proc_stat ) ;
skipToNextLineOrEOF ( * proc_stat ) ;
}
else if ( name = = " procs_running " )
{
UInt64 processes_running = 0 ;
readText ( processes_running , * proc_stat ) ;
skipToNextLineOrEOF ( * proc_stat ) ;
new_values [ " OSProcessesRunning " ] = processes_running ;
}
else if ( name = = " procs_blocked " )
{
UInt64 processes_blocked = 0 ;
readText ( processes_blocked , * proc_stat ) ;
skipToNextLineOrEOF ( * proc_stat ) ;
new_values [ " OSProcessesBlocked " ] = processes_blocked ;
}
else
skipToNextLineOrEOF ( * proc_stat ) ;
2021-07-04 20:49:36 +00:00
}
2021-07-05 02:55:11 +00:00
if ( ! first_run )
2021-07-04 20:49:36 +00:00
{
2021-07-05 02:55:11 +00:00
ProcStatValuesOther delta_values = current_other_values - proc_stat_values_other ;
2021-07-08 03:15:30 +00:00
new_values [ " OSInterrupts " ] = delta_values . interrupts ;
new_values [ " OSContextSwitches " ] = delta_values . context_switches ;
new_values [ " OSProcessesCreated " ] = delta_values . processes_created ;
2021-07-05 02:55:11 +00:00
/// Also write values normalized to 0..1 by diving to the number of CPUs.
/// These values are good to be averaged across the cluster of non-uniform servers.
2021-07-05 19:32:33 +00:00
if ( num_cpus )
{
new_values [ " OSUserTimeNormalized " ] = delta_values_all_cpus . user * multiplier / num_cpus ;
new_values [ " OSNiceTimeNormalized " ] = delta_values_all_cpus . nice * multiplier / num_cpus ;
new_values [ " OSSystemTimeNormalized " ] = delta_values_all_cpus . system * multiplier / num_cpus ;
new_values [ " OSIdleTimeNormalized " ] = delta_values_all_cpus . idle * multiplier / num_cpus ;
new_values [ " OSIOWaitTimeNormalized " ] = delta_values_all_cpus . iowait * multiplier / num_cpus ;
new_values [ " OSIrqTimeNormalized " ] = delta_values_all_cpus . irq * multiplier / num_cpus ;
new_values [ " OSSoftIrqTimeNormalized " ] = delta_values_all_cpus . softirq * multiplier / num_cpus ;
new_values [ " OSStealTimeNormalized " ] = delta_values_all_cpus . steal * multiplier / num_cpus ;
new_values [ " OSGuestTimeNormalized " ] = delta_values_all_cpus . guest * multiplier / num_cpus ;
new_values [ " OSGuestNiceTimeNormalized " ] = delta_values_all_cpus . guest_nice * multiplier / num_cpus ;
}
2021-07-04 20:49:36 +00:00
}
2021-07-05 02:55:11 +00:00
proc_stat_values_other = current_other_values ;
}
catch ( . . . )
2021-07-04 20:49:36 +00:00
{
2021-07-05 02:55:11 +00:00
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-07-04 20:49:36 +00:00
}
}
2021-07-04 03:03:49 +00:00
if ( meminfo )
{
2021-07-05 02:55:11 +00:00
try
2021-07-04 20:49:36 +00:00
{
2021-07-05 02:55:11 +00:00
meminfo - > rewind ( ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
uint64_t free_plus_cached_bytes = 0 ;
while ( ! meminfo - > eof ( ) )
2021-07-04 20:49:36 +00:00
{
2021-07-05 02:55:11 +00:00
String name ;
readStringUntilWhitespace ( name , * meminfo ) ;
2021-07-04 21:54:46 +00:00
skipWhitespaceIfAny ( * meminfo , true ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
uint64_t kb = 0 ;
readText ( kb , * meminfo ) ;
2021-08-06 19:33:21 +00:00
if ( ! kb )
2021-07-04 20:49:36 +00:00
{
2021-08-06 19:33:21 +00:00
skipToNextLineOrEOF ( * meminfo ) ;
continue ;
}
2021-07-04 21:33:00 +00:00
2021-08-06 19:33:21 +00:00
skipWhitespaceIfAny ( * meminfo , true ) ;
2021-07-05 02:55:11 +00:00
2021-08-06 19:33:21 +00:00
/**
* Not all entries in / proc / meminfo contain the kB suffix , e . g .
* HugePages_Total : 0
* HugePages_Free : 0
* We simply skip such entries as they ' re not needed
*/
if ( * meminfo - > position ( ) = = ' \n ' )
{
skipToNextLineOrEOF ( * meminfo ) ;
continue ;
}
2021-07-05 02:55:11 +00:00
2021-08-06 19:33:21 +00:00
assertString ( " kB " , * meminfo ) ;
uint64_t bytes = kb * 1024 ;
if ( name = = " MemTotal: " )
{
new_values [ " OSMemoryTotal " ] = bytes ;
}
else if ( name = = " MemFree: " )
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
free_plus_cached_bytes + = bytes ;
new_values [ " OSMemoryFreeWithoutCached " ] = bytes ;
}
else if ( name = = " MemAvailable: " )
{
new_values [ " OSMemoryAvailable " ] = bytes ;
}
else if ( name = = " Buffers: " )
{
new_values [ " OSMemoryBuffers " ] = bytes ;
}
else if ( name = = " Cached: " )
{
free_plus_cached_bytes + = bytes ;
new_values [ " OSMemoryCached " ] = bytes ;
}
else if ( name = = " SwapCached: " )
{
new_values [ " OSMemorySwapCached " ] = bytes ;
2021-07-04 20:49:36 +00:00
}
2021-07-05 02:55:11 +00:00
skipToNextLineOrEOF ( * meminfo ) ;
2021-07-04 20:49:36 +00:00
}
2021-07-05 02:55:11 +00:00
new_values [ " OSMemoryFreePlusCached " ] = free_plus_cached_bytes ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-07-04 20:49:36 +00:00
}
2021-05-22 09:57:51 +00:00
}
2021-07-04 20:49:36 +00:00
// Try to add processor frequencies, ignoring errors.
if ( cpuinfo )
2021-05-22 09:57:51 +00:00
{
2021-07-04 20:49:36 +00:00
try
{
cpuinfo - > rewind ( ) ;
// We need the following lines:
// processor : 4
// cpu MHz : 4052.941
// They contain tabs and are interspersed with other info.
int core_id = 0 ;
while ( ! cpuinfo - > eof ( ) )
{
std : : string s ;
// We don't have any backslash escape sequences in /proc/cpuinfo, so
// this function will read the line until EOL, which is exactly what
// we need.
readEscapedStringUntilEOL ( s , * cpuinfo ) ;
// It doesn't read the EOL itself.
+ + cpuinfo - > position ( ) ;
if ( s . rfind ( " processor " , 0 ) = = 0 )
{
if ( auto colon = s . find_first_of ( ' : ' ) )
{
core_id = std : : stoi ( s . substr ( colon + 2 ) ) ;
}
}
else if ( s . rfind ( " cpu MHz " , 0 ) = = 0 )
{
if ( auto colon = s . find_first_of ( ' : ' ) )
{
auto mhz = std : : stod ( s . substr ( colon + 2 ) ) ;
new_values [ fmt : : format ( " CPUFrequencyMHz_{} " , core_id ) ] = mhz ;
}
}
}
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
}
if ( file_nr )
{
2021-07-05 02:55:11 +00:00
try
{
file_nr - > rewind ( ) ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
uint64_t open_files = 0 ;
readText ( open_files , * file_nr ) ;
new_values [ " OSOpenFiles " ] = open_files ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2021-05-22 09:57:51 +00:00
}
2021-08-04 19:25:53 +00:00
/// Update list of block devices periodically
/// (i.e. someone may add new disk to RAID array)
if ( block_devices_rescan_delay . elapsedSeconds ( ) > = 300 )
openBlockDevices ( ) ;
2021-08-05 21:33:13 +00:00
try
2021-07-05 02:24:36 +00:00
{
2021-08-05 21:33:13 +00:00
for ( auto & [ name , device ] : block_devs )
2021-07-05 02:55:11 +00:00
{
device - > rewind ( ) ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
BlockDeviceStatValues current_values { } ;
BlockDeviceStatValues & prev_values = block_device_stats [ name ] ;
current_values . read ( * device ) ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
BlockDeviceStatValues delta_values = current_values - prev_values ;
prev_values = current_values ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
if ( first_run )
continue ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
/// Always 512 according to the docs.
static constexpr size_t sector_size = 512 ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
/// Always in milliseconds according to the docs.
static constexpr double time_multiplier = 1e-6 ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockReadOps_ " + name ] = delta_values . read_ios ;
new_values [ " BlockWriteOps_ " + name ] = delta_values . write_ios ;
new_values [ " BlockDiscardOps_ " + name ] = delta_values . discard_ops ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockReadMerges_ " + name ] = delta_values . read_merges ;
new_values [ " BlockWriteMerges_ " + name ] = delta_values . write_merges ;
new_values [ " BlockDiscardMerges_ " + name ] = delta_values . discard_merges ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockReadBytes_ " + name ] = delta_values . read_sectors * sector_size ;
new_values [ " BlockWriteBytes_ " + name ] = delta_values . write_sectors * sector_size ;
new_values [ " BlockDiscardBytes_ " + name ] = delta_values . discard_sectors * sector_size ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockReadTime_ " + name ] = delta_values . read_ticks * time_multiplier ;
new_values [ " BlockWriteTime_ " + name ] = delta_values . write_ticks * time_multiplier ;
new_values [ " BlockDiscardTime_ " + name ] = delta_values . discard_ticks * time_multiplier ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockInFlightOps_ " + name ] = delta_values . in_flight_ios ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockActiveTime_ " + name ] = delta_values . io_ticks * time_multiplier ;
new_values [ " BlockQueueTime_ " + name ] = delta_values . time_in_queue * time_multiplier ;
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
if ( delta_values . in_flight_ios )
{
/// TODO Check if these values are meaningful.
2021-07-05 02:24:36 +00:00
2021-07-05 02:55:11 +00:00
new_values [ " BlockActiveTimePerOp_ " + name ] = delta_values . io_ticks * time_multiplier / delta_values . in_flight_ios ;
new_values [ " BlockQueueTimePerOp_ " + name ] = delta_values . time_in_queue * time_multiplier / delta_values . in_flight_ios ;
}
}
2021-08-05 21:33:13 +00:00
}
catch ( . . . )
{
2021-08-11 07:03:46 +00:00
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-08-05 21:33:13 +00:00
/// Try to reopen block devices in case of error
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
try
{
openBlockDevices ( ) ;
}
2021-07-05 02:55:11 +00:00
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-07-05 02:24:36 +00:00
}
}
2021-07-05 02:47:33 +00:00
if ( net_dev )
{
2021-07-05 02:55:11 +00:00
try
{
net_dev - > rewind ( ) ;
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
/// Skip first two lines:
/// Inter-| Receive | Transmit
/// face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
skipToNextLineOrEOF ( * net_dev ) ;
skipToNextLineOrEOF ( * net_dev ) ;
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
while ( ! net_dev - > eof ( ) )
2021-07-05 02:47:33 +00:00
{
2021-07-05 02:55:11 +00:00
skipWhitespaceIfAny ( * net_dev , true ) ;
String interface_name ;
readStringUntilWhitespace ( interface_name , * net_dev ) ;
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
/// We are not interested in loopback devices.
if ( ! interface_name . ends_with ( ' : ' ) | | interface_name = = " lo: " | | interface_name . size ( ) < = 1 )
{
skipToNextLineOrEOF ( * net_dev ) ;
continue ;
}
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
interface_name . pop_back ( ) ;
NetworkInterfaceStatValues current_values { } ;
uint64_t unused ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . recv_bytes , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . recv_packets , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . recv_errors , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . recv_drop , * net_dev ) ;
/// NOTE We should pay more attention to the number of fields.
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( unused , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( unused , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( unused , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( unused , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . send_bytes , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . send_packets , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . send_errors , * net_dev ) ;
skipWhitespaceIfAny ( * net_dev , true ) ;
readText ( current_values . send_drop , * net_dev ) ;
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
skipToNextLineOrEOF ( * net_dev ) ;
2021-07-05 02:47:33 +00:00
2021-07-05 02:55:11 +00:00
NetworkInterfaceStatValues & prev_values = network_interface_stats [ interface_name ] ;
NetworkInterfaceStatValues delta_values = current_values - prev_values ;
prev_values = current_values ;
if ( ! first_run )
{
new_values [ " NetworkReceiveBytes_ " + interface_name ] = delta_values . recv_bytes ;
new_values [ " NetworkReceivePackets_ " + interface_name ] = delta_values . recv_packets ;
new_values [ " NetworkReceiveErrors_ " + interface_name ] = delta_values . recv_errors ;
new_values [ " NetworkReceiveDrop_ " + interface_name ] = delta_values . recv_drop ;
new_values [ " NetworkSendBytes_ " + interface_name ] = delta_values . send_bytes ;
new_values [ " NetworkSendPackets_ " + interface_name ] = delta_values . send_packets ;
new_values [ " NetworkSendErrors_ " + interface_name ] = delta_values . send_errors ;
new_values [ " NetworkSendDrop_ " + interface_name ] = delta_values . send_drop ;
}
2021-07-05 02:47:33 +00:00
}
}
2021-07-05 02:55:11 +00:00
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2021-07-05 02:47:33 +00:00
}
2021-08-11 07:09:00 +00:00
try
2021-07-04 21:22:58 +00:00
{
2021-08-11 07:09:00 +00:00
for ( size_t i = 0 , size = thermal . size ( ) ; i < size ; + + i )
2021-07-05 02:55:11 +00:00
{
2021-07-12 01:12:34 +00:00
ReadBufferFromFilePRead & in = * thermal [ i ] ;
2021-07-04 20:49:36 +00:00
2021-07-05 02:55:11 +00:00
in . rewind ( ) ;
2021-07-07 09:09:38 +00:00
Int64 temperature = 0 ;
2021-07-05 02:55:11 +00:00
readText ( temperature , in ) ;
new_values [ fmt : : format ( " Temperature{} " , i ) ] = temperature * 0.001 ;
}
2021-08-11 07:09:00 +00:00
}
catch ( . . . )
{
2021-09-11 07:03:06 +00:00
if ( errno ! = ENODATA ) /// Ok for thermal sensors.
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-08-11 07:09:00 +00:00
/// Files maybe re-created on module load/unload
try
{
openSensors ( ) ;
}
2021-07-05 02:55:11 +00:00
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2021-07-04 21:22:58 +00:00
}
2021-07-05 00:40:28 +00:00
2021-08-11 06:54:55 +00:00
try
2021-07-05 00:40:28 +00:00
{
2021-08-11 06:54:55 +00:00
for ( const auto & [ hwmon_name , sensors ] : hwmon_devices )
2021-07-05 00:40:28 +00:00
{
2021-07-05 02:55:11 +00:00
for ( const auto & [ sensor_name , sensor_file ] : sensors )
{
sensor_file - > rewind ( ) ;
2021-07-07 09:09:38 +00:00
Int64 temperature = 0 ;
2021-07-30 14:36:28 +00:00
try
{
readText ( temperature , * sensor_file ) ;
}
2021-07-30 17:03:57 +00:00
catch ( const ErrnoException & e )
2021-07-30 14:36:28 +00:00
{
LOG_DEBUG ( & Poco : : Logger : : get ( " AsynchronousMetrics " ) , " Hardware monitor '{}', sensor '{}' exists but could not be read, error {}. " , hwmon_name , sensor_name , e . getErrno ( ) ) ;
}
2021-07-05 00:56:14 +00:00
2021-07-05 02:55:11 +00:00
if ( sensor_name . empty ( ) )
new_values [ fmt : : format ( " Temperature_{} " , hwmon_name ) ] = temperature * 0.001 ;
else
new_values [ fmt : : format ( " Temperature_{}_{} " , hwmon_name , sensor_name ) ] = temperature * 0.001 ;
}
}
2021-08-11 06:54:55 +00:00
}
catch ( . . . )
{
2021-09-11 07:03:06 +00:00
if ( errno ! = ENODATA ) /// Ok for thermal sensors.
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-08-11 06:54:55 +00:00
/// Files can be re-created on:
/// - module load/unload
/// - suspend/resume cycle
/// So file descriptors should be reopened.
try
{
openSensorsChips ( ) ;
}
2021-07-05 02:55:11 +00:00
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-07-05 00:40:28 +00:00
}
}
2021-07-05 01:18:12 +00:00
2021-08-11 07:02:34 +00:00
try
2021-07-05 01:18:12 +00:00
{
2021-08-11 07:02:34 +00:00
for ( size_t i = 0 , size = edac . size ( ) ; i < size ; + + i )
2021-07-05 01:18:12 +00:00
{
2021-08-11 07:02:34 +00:00
/// NOTE maybe we need to take difference with previous values.
/// But these metrics should be exceptionally rare, so it's ok to keep them accumulated.
2021-07-05 02:55:11 +00:00
if ( edac [ i ] . first )
{
2021-07-12 01:12:34 +00:00
ReadBufferFromFilePRead & in = * edac [ i ] . first ;
2021-07-05 02:55:11 +00:00
in . rewind ( ) ;
uint64_t errors = 0 ;
readText ( errors , in ) ;
new_values [ fmt : : format ( " EDAC{}_Correctable " , i ) ] = errors ;
}
2021-07-05 01:18:12 +00:00
2021-07-05 02:55:11 +00:00
if ( edac [ i ] . second )
{
2021-07-12 01:12:34 +00:00
ReadBufferFromFilePRead & in = * edac [ i ] . second ;
2021-07-05 02:55:11 +00:00
in . rewind ( ) ;
uint64_t errors = 0 ;
readText ( errors , in ) ;
new_values [ fmt : : format ( " EDAC{}_Uncorrectable " , i ) ] = errors ;
}
}
2021-08-11 07:02:34 +00:00
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
/// EDAC files can be re-created on module load/unload
try
{
openEDAC ( ) ;
}
2021-07-05 02:55:11 +00:00
catch ( . . . )
2021-07-05 01:18:12 +00:00
{
2021-07-05 02:55:11 +00:00
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2021-07-05 01:18:12 +00:00
}
}
2021-07-04 21:22:58 +00:00
# endif
2021-07-04 20:49:36 +00:00
2021-07-04 21:22:58 +00:00
/// Free space in filesystems at data path and logs path.
2021-05-22 09:57:51 +00:00
{
2021-07-04 21:22:58 +00:00
auto stat = getStatVFS ( getContext ( ) - > getPath ( ) ) ;
new_values [ " FilesystemMainPathTotalBytes " ] = stat . f_blocks * stat . f_bsize ;
new_values [ " FilesystemMainPathAvailableBytes " ] = stat . f_bavail * stat . f_bsize ;
new_values [ " FilesystemMainPathUsedBytes " ] = ( stat . f_blocks - stat . f_bavail ) * stat . f_bsize ;
new_values [ " FilesystemMainPathTotalINodes " ] = stat . f_files ;
new_values [ " FilesystemMainPathAvailableINodes " ] = stat . f_favail ;
new_values [ " FilesystemMainPathUsedINodes " ] = stat . f_files - stat . f_favail ;
}
2021-05-22 09:57:51 +00:00
2021-07-04 21:22:58 +00:00
{
2021-07-05 02:24:36 +00:00
/// Current working directory of the server is the directory with logs.
2021-07-04 21:22:58 +00:00
auto stat = getStatVFS ( " . " ) ;
new_values [ " FilesystemLogsPathTotalBytes " ] = stat . f_blocks * stat . f_bsize ;
new_values [ " FilesystemLogsPathAvailableBytes " ] = stat . f_bavail * stat . f_bsize ;
new_values [ " FilesystemLogsPathUsedBytes " ] = ( stat . f_blocks - stat . f_bavail ) * stat . f_bsize ;
new_values [ " FilesystemLogsPathTotalINodes " ] = stat . f_files ;
new_values [ " FilesystemLogsPathAvailableINodes " ] = stat . f_favail ;
new_values [ " FilesystemLogsPathUsedINodes " ] = stat . f_files - stat . f_favail ;
}
/// Free and total space on every configured disk.
{
DisksMap disks_map = getContext ( ) - > getDisksMap ( ) ;
for ( const auto & [ name , disk ] : disks_map )
{
auto total = disk - > getTotalSpace ( ) ;
2021-07-04 21:54:46 +00:00
/// Some disks don't support information about the space.
if ( ! total )
continue ;
2021-07-04 21:22:58 +00:00
auto available = disk - > getAvailableSpace ( ) ;
auto unreserved = disk - > getUnreservedSpace ( ) ;
new_values [ fmt : : format ( " DiskTotal_{} " , name ) ] = total ;
new_values [ fmt : : format ( " DiskUsed_{} " , name ) ] = total - available ;
new_values [ fmt : : format ( " DiskAvailable_{} " , name ) ] = available ;
new_values [ fmt : : format ( " DiskUnreserved_{} " , name ) ] = unreserved ;
}
2021-05-22 09:57:51 +00:00
}
2017-04-01 07:20:54 +00:00
{
2020-02-10 13:10:17 +00:00
auto databases = DatabaseCatalog : : instance ( ) . getDatabases ( ) ;
2017-04-01 07:20:54 +00:00
size_t max_queue_size = 0 ;
size_t max_inserts_in_queue = 0 ;
size_t max_merges_in_queue = 0 ;
size_t sum_queue_size = 0 ;
size_t sum_inserts_in_queue = 0 ;
size_t sum_merges_in_queue = 0 ;
size_t max_absolute_delay = 0 ;
size_t max_relative_delay = 0 ;
size_t max_part_count_for_partition = 0 ;
2019-07-17 15:36:28 +00:00
size_t number_of_databases = databases . size ( ) ;
size_t total_number_of_tables = 0 ;
2020-12-22 10:34:35 +00:00
size_t total_number_of_bytes = 0 ;
size_t total_number_of_rows = 0 ;
size_t total_number_of_parts = 0 ;
2017-04-01 07:20:54 +00:00
for ( const auto & db : databases )
{
2020-10-15 15:57:17 +00:00
/// Check if database can contain MergeTree tables
if ( ! db . second - > canContainMergeTreeTables ( ) )
2019-10-01 12:44:17 +00:00
continue ;
2021-07-04 22:41:09 +00:00
2021-04-10 23:33:54 +00:00
for ( auto iterator = db . second - > getTablesIterator ( getContext ( ) ) ; iterator - > isValid ( ) ; iterator - > next ( ) )
2017-04-01 07:20:54 +00:00
{
2019-07-17 15:36:28 +00:00
+ + total_number_of_tables ;
2020-04-22 06:01:33 +00:00
const auto & table = iterator - > table ( ) ;
2020-06-02 02:06:16 +00:00
if ( ! table )
continue ;
2021-07-04 22:41:09 +00:00
if ( MergeTreeData * table_merge_tree = dynamic_cast < MergeTreeData * > ( table . get ( ) ) )
{
const auto & settings = getContext ( ) - > getSettingsRef ( ) ;
2017-04-01 07:20:54 +00:00
2021-07-04 22:41:09 +00:00
calculateMax ( max_part_count_for_partition , table_merge_tree - > getMaxPartsCountForPartition ( ) ) ;
total_number_of_bytes + = table_merge_tree - > totalBytes ( settings ) . value ( ) ;
total_number_of_rows + = table_merge_tree - > totalRows ( settings ) . value ( ) ;
total_number_of_parts + = table_merge_tree - > getPartsCount ( ) ;
}
2021-07-11 01:01:23 +00:00
if ( StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast < StorageReplicatedMergeTree * > ( table . get ( ) ) )
2017-04-01 07:20:54 +00:00
{
StorageReplicatedMergeTree : : Status status ;
table_replicated_merge_tree - > getStatus ( status , false ) ;
calculateMaxAndSum ( max_queue_size , sum_queue_size , status . queue . queue_size ) ;
calculateMaxAndSum ( max_inserts_in_queue , sum_inserts_in_queue , status . queue . inserts_in_queue ) ;
calculateMaxAndSum ( max_merges_in_queue , sum_merges_in_queue , status . queue . merges_in_queue ) ;
2019-08-17 21:18:22 +00:00
if ( ! status . is_readonly )
2017-04-01 07:20:54 +00:00
{
2019-08-17 21:18:22 +00:00
try
{
time_t absolute_delay = 0 ;
time_t relative_delay = 0 ;
table_replicated_merge_tree - > getReplicaDelays ( absolute_delay , relative_delay ) ;
calculateMax ( max_absolute_delay , absolute_delay ) ;
calculateMax ( max_relative_delay , relative_delay ) ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ,
" Cannot get replica delay for table: " + backQuoteIfNeed ( db . first ) + " . " + backQuoteIfNeed ( iterator - > name ( ) ) ) ;
}
2017-04-01 07:20:54 +00:00
}
}
}
}
2020-06-10 19:17:30 +00:00
new_values [ " ReplicasMaxQueueSize " ] = max_queue_size ;
new_values [ " ReplicasMaxInsertsInQueue " ] = max_inserts_in_queue ;
new_values [ " ReplicasMaxMergesInQueue " ] = max_merges_in_queue ;
2017-04-01 07:20:54 +00:00
2020-06-10 19:17:30 +00:00
new_values [ " ReplicasSumQueueSize " ] = sum_queue_size ;
new_values [ " ReplicasSumInsertsInQueue " ] = sum_inserts_in_queue ;
new_values [ " ReplicasSumMergesInQueue " ] = sum_merges_in_queue ;
2017-04-01 07:20:54 +00:00
2020-06-10 19:17:30 +00:00
new_values [ " ReplicasMaxAbsoluteDelay " ] = max_absolute_delay ;
new_values [ " ReplicasMaxRelativeDelay " ] = max_relative_delay ;
2017-04-01 07:20:54 +00:00
2020-06-10 19:17:30 +00:00
new_values [ " MaxPartCountForPartition " ] = max_part_count_for_partition ;
2019-07-17 15:36:28 +00:00
2020-06-10 19:17:30 +00:00
new_values [ " NumberOfDatabases " ] = number_of_databases ;
new_values [ " NumberOfTables " ] = total_number_of_tables ;
2020-12-17 13:47:03 +00:00
2020-12-22 10:34:35 +00:00
new_values [ " TotalBytesOfMergeTreeTables " ] = total_number_of_bytes ;
new_values [ " TotalRowsOfMergeTreeTables " ] = total_number_of_rows ;
new_values [ " TotalPartsOfMergeTreeTables " ] = total_number_of_parts ;
2020-12-17 13:47:03 +00:00
auto get_metric_name = [ ] ( const String & name ) - > const char *
{
2021-07-04 22:41:09 +00:00
static std : : map < String , const char * > metric_map =
{
2020-12-17 13:47:03 +00:00
{ " tcp_port " , " TCPThreads " } ,
{ " tcp_port_secure " , " TCPSecureThreads " } ,
{ " http_port " , " HTTPThreads " } ,
{ " https_port " , " HTTPSecureThreads " } ,
{ " interserver_http_port " , " InterserverThreads " } ,
{ " interserver_https_port " , " InterserverSecureThreads " } ,
{ " mysql_port " , " MySQLThreads " } ,
{ " postgresql_port " , " PostgreSQLThreads " } ,
{ " grpc_port " , " GRPCThreads " } ,
{ " prometheus.port " , " PrometheusThreads " }
} ;
auto it = metric_map . find ( name ) ;
if ( it = = metric_map . end ( ) )
return nullptr ;
else
return it - > second ;
} ;
2020-12-21 21:47:10 +00:00
if ( servers_to_start_before_tables )
2020-12-17 13:47:03 +00:00
{
2020-12-21 21:47:10 +00:00
for ( const auto & server : * servers_to_start_before_tables )
{
if ( const auto * name = get_metric_name ( server . getPortName ( ) ) )
new_values [ name ] = server . currentThreads ( ) ;
}
2020-12-17 13:47:03 +00:00
}
2020-12-21 21:47:10 +00:00
if ( servers )
2020-12-17 13:47:03 +00:00
{
2020-12-21 21:47:10 +00:00
for ( const auto & server : * servers )
{
if ( const auto * name = get_metric_name ( server . getPortName ( ) ) )
new_values [ name ] = server . currentThreads ( ) ;
}
2020-12-17 13:47:03 +00:00
}
2017-04-01 07:20:54 +00:00
}
2016-10-23 06:12:50 +00:00
2020-04-16 12:31:57 +00:00
# if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
2020-06-18 01:54:10 +00:00
// 'epoch' is a special mallctl -- it updates the statistics. Without it, all
// the following calls will return stale values. It increments and returns
// the current epoch number, which might be useful to log as a sanity check.
auto epoch = updateJemallocEpoch ( ) ;
new_values [ " jemalloc.epoch " ] = epoch ;
// Collect the statistics themselves.
saveJemallocMetric < size_t > ( new_values , " allocated " ) ;
saveJemallocMetric < size_t > ( new_values , " active " ) ;
saveJemallocMetric < size_t > ( new_values , " metadata " ) ;
saveJemallocMetric < size_t > ( new_values , " metadata_thp " ) ;
saveJemallocMetric < size_t > ( new_values , " resident " ) ;
saveJemallocMetric < size_t > ( new_values , " mapped " ) ;
saveJemallocMetric < size_t > ( new_values , " retained " ) ;
saveJemallocMetric < size_t > ( new_values , " background_thread.num_threads " ) ;
saveJemallocMetric < uint64_t > ( new_values , " background_thread.num_runs " ) ;
saveJemallocMetric < uint64_t > ( new_values , " background_thread.run_intervals " ) ;
saveAllArenasMetric < size_t > ( new_values , " pactive " ) ;
saveAllArenasMetric < size_t > ( new_values , " pdirty " ) ;
saveAllArenasMetric < size_t > ( new_values , " pmuzzy " ) ;
saveAllArenasMetric < size_t > ( new_values , " dirty_purged " ) ;
saveAllArenasMetric < size_t > ( new_values , " muzzy_purged " ) ;
2018-08-02 00:20:20 +00:00
# endif
2017-04-01 07:20:54 +00:00
/// Add more metrics as you wish.
2020-06-10 19:17:30 +00:00
2021-07-04 22:33:32 +00:00
new_values [ " AsynchronousMetricsCalculationTimeSpent " ] = watch . elapsedSeconds ( ) ;
/// Log the new metrics.
2021-08-04 19:25:53 +00:00
if ( auto asynchronous_metric_log = getContext ( ) - > getAsynchronousMetricLog ( ) )
2020-06-10 19:17:30 +00:00
{
2021-08-04 19:25:53 +00:00
asynchronous_metric_log - > addValues ( new_values ) ;
2020-06-10 19:17:30 +00:00
}
2021-07-04 20:49:36 +00:00
first_run = false ;
2020-06-10 19:17:30 +00:00
// Finally, update the current metrics.
std : : lock_guard lock ( mutex ) ;
values = new_values ;
2016-10-23 06:12:50 +00:00
}
}