From dda4d0e34e975e248848dcbd08956e675d06825d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 04:18:12 +0300 Subject: [PATCH] Add EDAC --- src/Interpreters/AsynchronousMetrics.cpp | 47 ++++++++++++++++++++++++ src/Interpreters/AsynchronousMetrics.h | 8 +++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 014ac4639c0..c2184f78d6f 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -109,6 +109,7 @@ AsynchronousMetrics::AsynchronousMetrics( String hwmon_name; ReadBufferFromFile hwmon_name_in(hwmon_name_file, small_buffer_size); readText(hwmon_name, hwmon_name_in); + std::replace(hwmon_name.begin(), hwmon_name.end(), ' ', '_'); for (size_t sensor_index = 0;; ++sensor_index) { @@ -136,11 +137,36 @@ AsynchronousMetrics::AsynchronousMetrics( { ReadBufferFromFile sensor_name_in(sensor_name_file, small_buffer_size); readText(sensor_name, sensor_name_in); + std::replace(sensor_name.begin(), sensor_name.end(), ' ', '_'); } hwmon_devices[hwmon_name][sensor_name] = std::move(file); } } + + for (size_t edac_index = 0;; ++edac_index) + { + String edac_correctable_file = fmt::format("/sys/devices/system/edac/mc/mc{}/ce_count", edac_index); + String edac_uncorrectable_file = fmt::format("/sys/devices/system/edac/mc/mc{}/ue_count", edac_index); + + bool edac_correctable_file_exists = std::filesystem::exists(edac_correctable_file); + bool edac_uncorrectable_file_exists = std::filesystem::exists(edac_uncorrectable_file); + + if (!edac_correctable_file_exists && !edac_uncorrectable_file_exists) + { + if (edac_index == 0) + continue; + else + break; + } + + edac.emplace_back(); + + if (edac_correctable_file_exists) + edac.back().first = openFileIfExists(edac_correctable_file); + if (edac_uncorrectable_file_exists) + edac.back().second = openFileIfExists(edac_uncorrectable_file); + } #endif } @@ -741,6 +767,27 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti new_values[fmt::format("Temperature_{}_{}", hwmon_name, sensor_name)] = temperature * 0.001; } } + + for (size_t i = 0, size = edac.size(); i < size; ++i) + { + if (edac[i].first) + { + ReadBufferFromFile & in = *edac[i].first; + in.rewind(); + uint64_t errors = 0; + readText(errors, in); + new_values[fmt::format("EDAC{}_Correctable", i)] = errors; + } + + if (edac[i].second) + { + ReadBufferFromFile & in = *edac[i].second; + in.rewind(); + uint64_t errors = 0; + readText(errors, in); + new_values[fmt::format("EDAC{}_Uncorrectable", i)] = errors; + } + } #endif /// Free space in filesystems at data path and logs path. diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 32226d5d25b..8c0a00df2a3 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -84,13 +85,18 @@ private: std::optional cpuinfo; std::optional file_nr; std::optional uptime; + std::vector> thermal; std::unordered_map>> hwmon_devices; - /// TODO: IO load, Network rx/tx, sockets, EDAC. + std::vector /* correctable errors */, + std::unique_ptr /* uncorrectable errors */>> edac; + + /// TODO: IO load, Network rx/tx, sockets. struct ProcStatValuesCPU {