ClickHouse/tools/clickhouse-diagnostics/internal/collectors/system/system.go
2022-04-27 13:22:20 +01:00

235 lines
7.9 KiB
Go

package system
import (
"github.com/ClickHouse/clickhouse-diagnostics/internal/collectors"
"github.com/ClickHouse/clickhouse-diagnostics/internal/platform"
"github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config"
"github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data"
"github.com/elastic/gosigar"
"github.com/jaypipes/ghw"
"github.com/matishsiao/goInfo"
"github.com/pkg/errors"
"strings"
)
// This collector collects the system overview
type SystemCollector struct {
resourceManager *platform.ResourceManager
}
func NewSystemCollector(m *platform.ResourceManager) *SystemCollector {
return &SystemCollector{
resourceManager: m,
}
}
func (sc *SystemCollector) Collect(conf config.Configuration) (*data.DiagnosticBundle, error) {
conf, err := conf.ValidateConfig(sc.Configuration())
if err != nil {
return &data.DiagnosticBundle{}, err
}
frames := make(map[string]data.Frame)
var frameErrors []error
frameErrors = addStatsToFrame(frames, frameErrors, "disks", getDisk)
frameErrors = addStatsToFrame(frames, frameErrors, "disk_usage", getDiskUsage)
frameErrors = addStatsToFrame(frames, frameErrors, "memory", getMemory)
frameErrors = addStatsToFrame(frames, frameErrors, "memory_usage", getMemoryUsage)
frameErrors = addStatsToFrame(frames, frameErrors, "cpu", getCPU)
//frameErrors = addStatsToFrame(frames, frameErrors, "cpu_usage", getCPUUsage)
frameErrors = addStatsToFrame(frames, frameErrors, "processes", getProcessList)
frameErrors = addStatsToFrame(frames, frameErrors, "os", getHostDetails)
return &data.DiagnosticBundle{
Frames: frames,
Errors: data.FrameErrors{
Errors: frameErrors,
},
}, err
}
func addStatsToFrame(frames map[string]data.Frame, errors []error, name string, statFunc func() (data.MemoryFrame, error)) []error {
frame, err := statFunc()
if err != nil {
errors = append(errors, err)
}
frames[name] = frame
return errors
}
func (sc *SystemCollector) Configuration() config.Configuration {
return config.Configuration{
Params: []config.ConfigParam{},
}
}
func (sc *SystemCollector) IsDefault() bool {
return true
}
func getDisk() (data.MemoryFrame, error) {
block, err := ghw.Block()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list block storage")
}
var rows [][]interface{}
columns := []string{"name", "size", "physicalBlockSize", "driveType", "controller", "vendor", "model", "partitionName", "partitionSize", "mountPoint", "readOnly"}
for _, disk := range block.Disks {
for _, part := range disk.Partitions {
rows = append(rows, []interface{}{disk.Name, disk.SizeBytes, disk.PhysicalBlockSizeBytes, disk.DriveType, disk.StorageController, disk.Vendor, disk.Model, part.Name, part.SizeBytes, part.MountPoint, part.IsReadOnly})
}
}
return data.NewMemoryFrame("disk_usage", columns, rows), nil
}
func getDiskUsage() (data.MemoryFrame, error) {
fsList := gosigar.FileSystemList{}
err := fsList.Get()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list filesystems for usage")
}
rows := make([][]interface{}, len(fsList.List))
columns := []string{"filesystem", "size", "used", "avail", "use%", "mounted on"}
for i, fs := range fsList.List {
dirName := fs.DirName
usage := gosigar.FileSystemUsage{}
err = usage.Get(dirName)
if err == nil {
rows[i] = []interface{}{fs.DevName, usage.Total, usage.Used, usage.Avail, usage.UsePercent(), dirName}
} else {
// we try to output something
rows[i] = []interface{}{fs.DevName, 0, 0, 0, 0, dirName}
}
}
return data.NewMemoryFrame("disk_usage", columns, rows), nil
}
func getMemory() (data.MemoryFrame, error) {
memory, err := ghw.Memory()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read memory")
}
columns := []string{"totalPhysical", "totalUsable", "supportedPageSizes"}
rows := make([][]interface{}, 1)
rows[0] = []interface{}{memory.TotalPhysicalBytes, memory.TotalUsableBytes, memory.SupportedPageSizes}
return data.NewMemoryFrame("memory", columns, rows), nil
}
func getMemoryUsage() (data.MemoryFrame, error) {
mem := gosigar.Mem{}
swap := gosigar.Swap{}
err := mem.Get()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read memory usage")
}
err = swap.Get()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read swap")
}
columns := []string{"type", "total", "used", "free"}
rows := make([][]interface{}, 3)
rows[0] = []interface{}{"mem", mem.Total, mem.Used, mem.Free}
rows[1] = []interface{}{"buffers/cache", 0, mem.ActualUsed, mem.ActualFree}
rows[2] = []interface{}{"swap", swap.Total, swap.Used, swap.Free}
return data.NewMemoryFrame("memory_usage", columns, rows), nil
}
func getCPU() (data.MemoryFrame, error) {
cpu, err := ghw.CPU()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list cpus")
}
columns := []string{"processor", "vendor", "model", "core", "numThreads", "logical", "capabilities"}
var rows [][]interface{}
for _, proc := range cpu.Processors {
for _, core := range proc.Cores {
rows = append(rows, []interface{}{proc.ID, proc.Vendor, proc.Model, core.ID, core.NumThreads, core.LogicalProcessors, strings.Join(proc.Capabilities, " ")})
}
}
return data.NewMemoryFrame("cpu", columns, rows), nil
}
// this gets cpu usage vs a listing of arch etc - see getCPU(). This needs successive values as its ticks - not currently used
// see https://github.com/elastic/beats/blob/master/metricbeat/internal/metrics/cpu/metrics.go#L131 for inspiration
//nolint
func getCPUUsage() (data.MemoryFrame, error) {
cpuList := gosigar.CpuList{}
err := cpuList.Get()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list cpus for usage")
}
columns := []string{"sys", "nice", "stolen", "irq", "idle", "softIrq", "user", "wait", "total"}
rows := make([][]interface{}, len(cpuList.List), len(cpuList.List))
for i, cpu := range cpuList.List {
rows[i] = []interface{}{cpu.Sys, cpu.Nice, cpu.Stolen, cpu.Irq, cpu.Idle, cpu.SoftIrq, cpu.User, cpu.Wait, cpu.Total()}
}
return data.NewMemoryFrame("cpu_usage", columns, rows), nil
}
func getProcessList() (data.MemoryFrame, error) {
pidList := gosigar.ProcList{}
err := pidList.Get()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list processes")
}
columns := []string{"pid", "ppid", "stime", "time", "rss", "size", "faults", "minorFaults", "majorFaults", "user", "state", "priority", "nice", "command"}
rows := make([][]interface{}, len(pidList.List))
for i, pid := range pidList.List {
state := gosigar.ProcState{}
mem := gosigar.ProcMem{}
time := gosigar.ProcTime{}
args := gosigar.ProcArgs{}
if err := state.Get(pid); err != nil {
continue
}
if err := mem.Get(pid); err != nil {
continue
}
if err := time.Get(pid); err != nil {
continue
}
if err := args.Get(pid); err != nil {
continue
}
rows[i] = []interface{}{pid, state.Ppid, time.FormatStartTime(), time.FormatTotal(), mem.Resident, mem.Size,
mem.PageFaults, mem.MinorFaults, mem.MajorFaults, state.Username, state.State, state.Priority, state.Nice,
strings.Join(args.List, " ")}
}
return data.NewMemoryFrame("process_list", columns, rows), nil
}
func getHostDetails() (data.MemoryFrame, error) {
gi, err := goInfo.GetInfo()
if err != nil {
return data.MemoryFrame{}, errors.Wrapf(err, "unable to get host summary")
}
columns := []string{"hostname", "os", "goOs", "cpus", "core", "kernel", "platform"}
rows := [][]interface{}{
{gi.Hostname, gi.OS, gi.GoOS, gi.CPUs, gi.Core, gi.Kernel, gi.Platform},
}
return data.NewMemoryFrame("os", columns, rows), nil
}
func (sc *SystemCollector) Description() string {
return "Collects summary OS and hardware statistics for the host"
}
// here we register the collector for use
func init() {
collectors.Register("system", func() (collectors.Collector, error) {
return &SystemCollector{
resourceManager: platform.GetResourceManager(),
}, nil
})
}