2022-04-27 12:22:20 +00:00
|
|
|
package system
|
|
|
|
|
|
|
|
import (
|
2022-06-14 10:57:04 +00:00
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors"
|
|
|
|
"github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform"
|
|
|
|
"github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config"
|
|
|
|
"github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data"
|
2022-04-27 12:22:20 +00:00
|
|
|
"github.com/elastic/gosigar"
|
|
|
|
"github.com/jaypipes/ghw"
|
|
|
|
"github.com/matishsiao/goInfo"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
)
|
|
|
|
|
|
|
|
// This collector collects the system overview
|
|
|
|
|
|
|
|
type SystemCollector struct {
|
|
|
|
resourceManager *platform.ResourceManager
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewSystemCollector(m *platform.ResourceManager) *SystemCollector {
|
|
|
|
return &SystemCollector{
|
|
|
|
resourceManager: m,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *SystemCollector) Collect(conf config.Configuration) (*data.DiagnosticBundle, error) {
|
2022-06-30 16:42:55 +00:00
|
|
|
_, err := conf.ValidateConfig(sc.Configuration())
|
2022-04-27 12:22:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return &data.DiagnosticBundle{}, err
|
|
|
|
}
|
|
|
|
frames := make(map[string]data.Frame)
|
|
|
|
var frameErrors []error
|
|
|
|
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "disks", getDisk)
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "disk_usage", getDiskUsage)
|
|
|
|
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "memory", getMemory)
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "memory_usage", getMemoryUsage)
|
|
|
|
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "cpu", getCPU)
|
|
|
|
//frameErrors = addStatsToFrame(frames, frameErrors, "cpu_usage", getCPUUsage)
|
|
|
|
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "processes", getProcessList)
|
|
|
|
|
|
|
|
frameErrors = addStatsToFrame(frames, frameErrors, "os", getHostDetails)
|
|
|
|
|
|
|
|
return &data.DiagnosticBundle{
|
|
|
|
Frames: frames,
|
|
|
|
Errors: data.FrameErrors{
|
|
|
|
Errors: frameErrors,
|
|
|
|
},
|
|
|
|
}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func addStatsToFrame(frames map[string]data.Frame, errors []error, name string, statFunc func() (data.MemoryFrame, error)) []error {
|
|
|
|
frame, err := statFunc()
|
|
|
|
if err != nil {
|
|
|
|
errors = append(errors, err)
|
|
|
|
}
|
|
|
|
frames[name] = frame
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *SystemCollector) Configuration() config.Configuration {
|
|
|
|
return config.Configuration{
|
|
|
|
Params: []config.ConfigParam{},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *SystemCollector) IsDefault() bool {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func getDisk() (data.MemoryFrame, error) {
|
|
|
|
block, err := ghw.Block()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list block storage")
|
|
|
|
}
|
|
|
|
var rows [][]interface{}
|
|
|
|
columns := []string{"name", "size", "physicalBlockSize", "driveType", "controller", "vendor", "model", "partitionName", "partitionSize", "mountPoint", "readOnly"}
|
|
|
|
for _, disk := range block.Disks {
|
|
|
|
for _, part := range disk.Partitions {
|
|
|
|
rows = append(rows, []interface{}{disk.Name, disk.SizeBytes, disk.PhysicalBlockSizeBytes, disk.DriveType, disk.StorageController, disk.Vendor, disk.Model, part.Name, part.SizeBytes, part.MountPoint, part.IsReadOnly})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("disk_usage", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getDiskUsage() (data.MemoryFrame, error) {
|
|
|
|
fsList := gosigar.FileSystemList{}
|
|
|
|
err := fsList.Get()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list filesystems for usage")
|
|
|
|
}
|
|
|
|
rows := make([][]interface{}, len(fsList.List))
|
|
|
|
columns := []string{"filesystem", "size", "used", "avail", "use%", "mounted on"}
|
|
|
|
for i, fs := range fsList.List {
|
|
|
|
dirName := fs.DirName
|
|
|
|
usage := gosigar.FileSystemUsage{}
|
|
|
|
err = usage.Get(dirName)
|
|
|
|
if err == nil {
|
|
|
|
rows[i] = []interface{}{fs.DevName, usage.Total, usage.Used, usage.Avail, usage.UsePercent(), dirName}
|
|
|
|
} else {
|
|
|
|
// we try to output something
|
|
|
|
rows[i] = []interface{}{fs.DevName, 0, 0, 0, 0, dirName}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("disk_usage", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getMemory() (data.MemoryFrame, error) {
|
|
|
|
memory, err := ghw.Memory()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read memory")
|
|
|
|
}
|
|
|
|
columns := []string{"totalPhysical", "totalUsable", "supportedPageSizes"}
|
|
|
|
rows := make([][]interface{}, 1)
|
|
|
|
rows[0] = []interface{}{memory.TotalPhysicalBytes, memory.TotalUsableBytes, memory.SupportedPageSizes}
|
|
|
|
return data.NewMemoryFrame("memory", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getMemoryUsage() (data.MemoryFrame, error) {
|
|
|
|
mem := gosigar.Mem{}
|
|
|
|
swap := gosigar.Swap{}
|
|
|
|
|
|
|
|
err := mem.Get()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read memory usage")
|
|
|
|
}
|
|
|
|
|
|
|
|
err = swap.Get()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to read swap")
|
|
|
|
}
|
|
|
|
|
|
|
|
columns := []string{"type", "total", "used", "free"}
|
|
|
|
rows := make([][]interface{}, 3)
|
|
|
|
|
|
|
|
rows[0] = []interface{}{"mem", mem.Total, mem.Used, mem.Free}
|
|
|
|
rows[1] = []interface{}{"buffers/cache", 0, mem.ActualUsed, mem.ActualFree}
|
|
|
|
rows[2] = []interface{}{"swap", swap.Total, swap.Used, swap.Free}
|
|
|
|
return data.NewMemoryFrame("memory_usage", columns, rows), nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func getCPU() (data.MemoryFrame, error) {
|
|
|
|
cpu, err := ghw.CPU()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list cpus")
|
|
|
|
}
|
|
|
|
columns := []string{"processor", "vendor", "model", "core", "numThreads", "logical", "capabilities"}
|
|
|
|
var rows [][]interface{}
|
|
|
|
for _, proc := range cpu.Processors {
|
|
|
|
for _, core := range proc.Cores {
|
|
|
|
rows = append(rows, []interface{}{proc.ID, proc.Vendor, proc.Model, core.ID, core.NumThreads, core.LogicalProcessors, strings.Join(proc.Capabilities, " ")})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("cpu", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// this gets cpu usage vs a listing of arch etc - see getCPU(). This needs successive values as its ticks - not currently used
|
|
|
|
// see https://github.com/elastic/beats/blob/master/metricbeat/internal/metrics/cpu/metrics.go#L131 for inspiration
|
|
|
|
//nolint
|
|
|
|
func getCPUUsage() (data.MemoryFrame, error) {
|
|
|
|
cpuList := gosigar.CpuList{}
|
|
|
|
err := cpuList.Get()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list cpus for usage")
|
|
|
|
}
|
|
|
|
columns := []string{"sys", "nice", "stolen", "irq", "idle", "softIrq", "user", "wait", "total"}
|
|
|
|
rows := make([][]interface{}, len(cpuList.List), len(cpuList.List))
|
|
|
|
for i, cpu := range cpuList.List {
|
|
|
|
rows[i] = []interface{}{cpu.Sys, cpu.Nice, cpu.Stolen, cpu.Irq, cpu.Idle, cpu.SoftIrq, cpu.User, cpu.Wait, cpu.Total()}
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("cpu_usage", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getProcessList() (data.MemoryFrame, error) {
|
|
|
|
pidList := gosigar.ProcList{}
|
|
|
|
err := pidList.Get()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to list processes")
|
|
|
|
}
|
|
|
|
columns := []string{"pid", "ppid", "stime", "time", "rss", "size", "faults", "minorFaults", "majorFaults", "user", "state", "priority", "nice", "command"}
|
|
|
|
rows := make([][]interface{}, len(pidList.List))
|
|
|
|
for i, pid := range pidList.List {
|
|
|
|
state := gosigar.ProcState{}
|
|
|
|
mem := gosigar.ProcMem{}
|
|
|
|
time := gosigar.ProcTime{}
|
|
|
|
args := gosigar.ProcArgs{}
|
|
|
|
if err := state.Get(pid); err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := mem.Get(pid); err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := time.Get(pid); err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := args.Get(pid); err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
rows[i] = []interface{}{pid, state.Ppid, time.FormatStartTime(), time.FormatTotal(), mem.Resident, mem.Size,
|
|
|
|
mem.PageFaults, mem.MinorFaults, mem.MajorFaults, state.Username, state.State, state.Priority, state.Nice,
|
|
|
|
strings.Join(args.List, " ")}
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("process_list", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getHostDetails() (data.MemoryFrame, error) {
|
|
|
|
gi, err := goInfo.GetInfo()
|
|
|
|
if err != nil {
|
|
|
|
return data.MemoryFrame{}, errors.Wrapf(err, "unable to get host summary")
|
|
|
|
}
|
|
|
|
columns := []string{"hostname", "os", "goOs", "cpus", "core", "kernel", "platform"}
|
|
|
|
rows := [][]interface{}{
|
|
|
|
{gi.Hostname, gi.OS, gi.GoOS, gi.CPUs, gi.Core, gi.Kernel, gi.Platform},
|
|
|
|
}
|
|
|
|
return data.NewMemoryFrame("os", columns, rows), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *SystemCollector) Description() string {
|
|
|
|
return "Collects summary OS and hardware statistics for the host"
|
|
|
|
}
|
|
|
|
|
|
|
|
// here we register the collector for use
|
|
|
|
func init() {
|
|
|
|
collectors.Register("system", func() (collectors.Collector, error) {
|
|
|
|
return &SystemCollector{
|
|
|
|
resourceManager: platform.GetResourceManager(),
|
|
|
|
}, nil
|
|
|
|
})
|
|
|
|
}
|