From ba05b7dd2c2218b1e63d701c5b762e2ace26f1b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Nov 2022 03:36:20 +0100 Subject: [PATCH] Add documentation for AsynchronousMetrics --- programs/server/MetricsTransmitter.cpp | 2 +- src/Interpreters/AsynchronousMetricLog.cpp | 2 +- src/Interpreters/AsynchronousMetricLog.h | 5 +- src/Interpreters/AsynchronousMetrics.cpp | 56 ++++++++++++------- src/Interpreters/AsynchronousMetrics.h | 1 + src/Server/PrometheusMetricsWriter.cpp | 7 ++- src/Server/PrometheusMetricsWriter.h | 2 +- .../StorageSystemAsynchronousMetrics.cpp | 4 +- 8 files changed, 51 insertions(+), 28 deletions(-) diff --git a/programs/server/MetricsTransmitter.cpp b/programs/server/MetricsTransmitter.cpp index 8ad519ba5aa..f7829a49a39 100644 --- a/programs/server/MetricsTransmitter.cpp +++ b/programs/server/MetricsTransmitter.cpp @@ -123,7 +123,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count { for (const auto & name_value : async_metrics_values) { - key_vals.emplace_back(asynchronous_metrics_path_prefix + name_value.first, name_value.second); + key_vals.emplace_back(asynchronous_metrics_path_prefix + name_value.first, name_value.second.value); } } diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index 228934d5f4d..6176bb781ab 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -47,7 +47,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values) for (const auto & [key, value] : values) { element.metric_name = key; - element.value = round(value * precision) / precision; + element.value = round(value.value * precision) / precision; add(element); } diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 900d84868bd..8a19fae29e9 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -14,12 +15,8 @@ namespace DB { -using AsynchronousMetricValue = double; -using AsynchronousMetricValues = std::unordered_map; - /** AsynchronousMetricLog is a log of metric values measured at regular time interval. */ - struct AsynchronousMetricLogElement { UInt16 event_date; diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index f861fbb8426..6e8d745ef0e 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -394,7 +394,7 @@ static Value saveJemallocMetricImpl( Value value{}; size_t size = sizeof(value); mallctl(jemalloc_full_name.c_str(), &value, &size, nullptr, 0); - values[clickhouse_full_name] = { value, "An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html" }; + values[clickhouse_full_name] = AsynchronousMetricValue(value, "An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html"); return value; } @@ -1482,7 +1482,8 @@ void AsynchronousMetrics::update(TimePoint update_time) in.rewind(); uint64_t errors = 0; readText(errors, in); - new_values[fmt::format("EDAC{}_Correctable", i)] = { errors, "The number of correctable ECC memory errors", + new_values[fmt::format("EDAC{}_Correctable", i)] = { errors, + "The number of correctable ECC memory errors." " A high number of this value indicates bad RAM which has to be immediately replaced," " because in presence of a high number of corrected errors, a number of silent errors may happen as well, leading to data corruption." }; } @@ -1493,7 +1494,8 @@ void AsynchronousMetrics::update(TimePoint update_time) in.rewind(); uint64_t errors = 0; readText(errors, in); - new_values[fmt::format("EDAC{}_Uncorrectable", i)] = { errors, "The number of uncorrectable ECC memory errors", + new_values[fmt::format("EDAC{}_Uncorrectable", i)] = { errors, + "The number of uncorrectable ECC memory errors." " A non-zero number of this value indicates bad RAM which has to be immediately replaced," " because it indicates potential data corruption." }; } @@ -1519,24 +1521,36 @@ void AsynchronousMetrics::update(TimePoint update_time) { auto stat = getStatVFS(getContext()->getPath()); - new_values["FilesystemMainPathTotalBytes"] = stat.f_blocks * stat.f_frsize; - new_values["FilesystemMainPathAvailableBytes"] = stat.f_bavail * stat.f_frsize; - new_values["FilesystemMainPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize; - new_values["FilesystemMainPathTotalINodes"] = stat.f_files; - new_values["FilesystemMainPathAvailableINodes"] = stat.f_favail; - new_values["FilesystemMainPathUsedINodes"] = stat.f_files - stat.f_favail; + new_values["FilesystemMainPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, + "The size of the volume where the main ClickHouse path is mounted, in bytes." }; + new_values["FilesystemMainPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, + "Available bytes on the volume where the main ClickHouse path is mounted." }; + new_values["FilesystemMainPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, + "Used bytes on the volume where the main ClickHouse path is mounted." }; + new_values["FilesystemMainPathTotalINodes"] = { stat.f_files, + "The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration." }; + new_values["FilesystemMainPathAvailableINodes"] = { stat.f_favail, + "The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full." }; + new_values["FilesystemMainPathUsedINodes"] = { stat.f_files - stat.f_favail, + "The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files." }; } { /// Current working directory of the server is the directory with logs. auto stat = getStatVFS("."); - new_values["FilesystemLogsPathTotalBytes"] = stat.f_blocks * stat.f_frsize; - new_values["FilesystemLogsPathAvailableBytes"] = stat.f_bavail * stat.f_frsize; - new_values["FilesystemLogsPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize; - new_values["FilesystemLogsPathTotalINodes"] = stat.f_files; - new_values["FilesystemLogsPathAvailableINodes"] = stat.f_favail; - new_values["FilesystemLogsPathUsedINodes"] = stat.f_files - stat.f_favail; + new_values["FilesystemLogsPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, + "The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs." }; + new_values["FilesystemLogsPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, + "Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file." }; + new_values["FilesystemLogsPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, + "Used bytes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathTotalINodes"] = { stat.f_files, + "The total number of inodes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathAvailableINodes"] = { stat.f_favail, + "The number of available inodes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathUsedINodes"] = { stat.f_files - stat.f_favail, + "The number of used inodes on the volume where ClickHouse logs path is mounted." }; } /// Free and total space on every configured disk. @@ -1553,10 +1567,14 @@ void AsynchronousMetrics::update(TimePoint update_time) auto available = disk->getAvailableSpace(); auto unreserved = disk->getUnreservedSpace(); - new_values[fmt::format("DiskTotal_{}", name)] = total; - new_values[fmt::format("DiskUsed_{}", name)] = total - available; - new_values[fmt::format("DiskAvailable_{}", name)] = available; - new_values[fmt::format("DiskUnreserved_{}", name)] = unreserved; + new_values[fmt::format("DiskTotal_{}", name)] = { total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskUsed_{}", name)] = { total - available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + new_values[fmt::format("DiskAvailable_{}", name)] = { available, + "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; } } diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 32a82bc106e..3529c162944 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -33,6 +33,7 @@ struct AsynchronousMetricValue AsynchronousMetricValue(double value_, const char * documentation_) : value(value_), documentation(documentation_) {} AsynchronousMetricValue(size_t value_, const char * documentation_) : value(value_), documentation(documentation_) {} + AsynchronousMetricValue() = default; /// For std::unordered_map::operator[]. }; using AsynchronousMetricValues = std::unordered_map; diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp index 9168eb5f24d..843d1e64463 100644 --- a/src/Server/PrometheusMetricsWriter.cpp +++ b/src/Server/PrometheusMetricsWriter.cpp @@ -108,11 +108,16 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const if (!replaceInvalidChars(key)) continue; + auto value = name_value.second; + std::string metric_doc{value.documentation}; + convertHelpToSingleLine(metric_doc); + // TODO: add HELP section? asynchronous_metrics contains only key and value + writeOutLine(wb, "# HELP", key, metric_doc); writeOutLine(wb, "# TYPE", key, "gauge"); - writeOutLine(wb, key, value); + writeOutLine(wb, key, value.value); } } diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h index 4422ced625e..0c2dde1f66f 100644 --- a/src/Server/PrometheusMetricsWriter.h +++ b/src/Server/PrometheusMetricsWriter.h @@ -3,11 +3,11 @@ #include #include - #include #include + namespace DB { diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp index 70e12440678..e2f62b902b7 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp @@ -12,6 +12,7 @@ NamesAndTypesList StorageSystemAsynchronousMetrics::getNamesAndTypes() return { {"metric", std::make_shared()}, {"value", std::make_shared()}, + {"description", std::make_shared()}, }; } @@ -27,7 +28,8 @@ void StorageSystemAsynchronousMetrics::fillData(MutableColumns & res_columns, Co for (const auto & name_value : async_metrics_values) { res_columns[0]->insert(name_value.first); - res_columns[1]->insert(name_value.second); + res_columns[1]->insert(name_value.second.value); + res_columns[2]->insert(name_value.second.documentation); } }