From 26a4ca6ab56d23054f606bc378ba355cebce485a Mon Sep 17 00:00:00 2001 From: Gibheer Date: Tue, 5 Sep 2023 15:07:38 +0200 Subject: migrate moncheck to slog With this moncheck itself is also changed to use slog. The early config parsing is still using log as we do not have any idea what else to use. But from then slog is used at all points. With the additional config options a man page was also added to explain the new config options. --- Makefile | 2 + cmd/moncheck/main.go | 77 ++++++++++++++++++++++++++++++++----- man/moncheck.1 | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++ moncheck.conf.example | 7 +++- 4 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 man/moncheck.1 diff --git a/Makefile b/Makefile index 3f43128..a9e9418 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ exec_prefix ?= ${prefix} bindir ?= ${exec_prefix}/bin sysconfdir ?= ${prefix}/etc/${NAME} datarootdir ?= ${prefix}/share +man1dir ?= ${datarootdir}/man/man1 datadir ?= ${datarootdir}/${NAME} WRKDIR ?= build GOBIN ?= go @@ -51,6 +52,7 @@ preinstall: install-moncheck: preinstall install -m 0755 ${WRKDIR}/moncheck ${DESTDIR}${bindir} install -m 0644 moncheck.conf.example ${DESTDIR}${sysconfdir} + install -m 0644 man/moncheck.1 ${DESTDIR}${man1dir} install-monwork: preinstall install -m 0755 ${WRKDIR}/monwork ${DESTDIR}${bindir} diff --git a/cmd/moncheck/main.go b/cmd/moncheck/main.go index 73eee1a..12695d4 100644 --- a/cmd/moncheck/main.go +++ b/cmd/moncheck/main.go @@ -7,8 +7,10 @@ import ( "encoding/json" "flag" "fmt" + "io" "io/ioutil" "log" + "log/slog" "os" "strconv" "strings" @@ -30,6 +32,12 @@ type ( Path []string `json:"path"` Workers int `json:"workers"` CheckerID int `json:"checker_id"` + + Log struct { + Format string `json:"format"` + Level string `json:"level"` + Output string `json:"output"` + } `json:"log"` } States []int @@ -47,27 +55,34 @@ func main() { log.Fatalf("could not parse config: %s", err) } + logger := parseLogger(config) + if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil { - log.Fatalf("could not set PATH: %s", err) + logger.Error("could not set PATH", "error", err, "configured path", config.Path) + os.Exit(1) } waitDuration, err := time.ParseDuration(config.Wait) if err != nil { - log.Fatalf("could not parse wait duration: %s", err) + logger.Error("could not parse wait duration", "error", err, "wait duration", config.Wait) + os.Exit(1) } timeout, err := time.ParseDuration(config.Timeout) if err != nil { - log.Fatalf("could not parse timeout: %s", err) + logger.Error("could not parse timeout", "error", err, "timeout", config.Timeout) + os.Exit(1) } db, err := sql.Open("postgres", config.DB) if err != nil { - log.Fatalf("could not open database connection: %s", err) + logger.Error("could not open database connection", "error", err) + os.Exit(1) } hostname, err := os.Hostname() if err != nil { - log.Fatalf("could not resolve hostname: %s", err) + logger.Error("could not resolve hostname", "error", err) + os.Exit(1) } checker, err := monzero.NewChecker(monzero.CheckerConfig{ @@ -78,22 +93,23 @@ func main() { Executor: monzero.CheckExec, }) if err != nil { - log.Fatalf("could not create checker instance: %s", err) + logger.Error("could not create checker instance", "error", err) + os.Exit(1) } for i := 0; i < config.Workers; i++ { - go check(checker, waitDuration) + go check(checker, waitDuration, logger) } wg := sync.WaitGroup{} wg.Add(1) wg.Wait() } -func check(checker *monzero.Checker, waitDuration time.Duration) { +func check(checker *monzero.Checker, waitDuration time.Duration, logger *slog.Logger) { for { if err := checker.Next(); err != nil { if err != monzero.ErrNoCheck { - log.Printf("could not run check: %s", err) + logger.Info("check returned error", "error", err) } time.Sleep(waitDuration) } @@ -171,3 +187,46 @@ func (s *States) ToOK() bool { } return false } + +// parse the log settings and generate the slog output +func parseLogger(config Config) *slog.Logger { + var output io.Writer + switch config.Log.Output { + case "", "stderr": + output = os.Stderr + case "stdout": + output = os.Stdout + default: + var err error + output, err = os.OpenFile(config.Log.Output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640) + if err != nil { + log.Fatalf("could not open log file handler: %s", err) + } + } + + var level slog.Level + switch config.Log.Level { + case "debug": + level = slog.LevelDebug + case "", "info": + level = slog.LevelInfo + case "warn": + level = slog.LevelWarn + case "error": + level = slog.LevelError + default: + log.Fatalf("unknown log level '%s', only 'debug', 'info', 'warn' and 'error' are supported", config.Log.Level) + } + + var handler slog.Handler + switch config.Log.Format { + case "", "text": + handler = slog.NewTextHandler(output, &slog.HandlerOptions{Level: level}) + case "json": + handler = slog.NewJSONHandler(output, &slog.HandlerOptions{Level: level}) + default: + log.Fatalf("unknown log format '%s', only 'text' and 'json' are supported", config.Log.Format) + } + + return slog.New(handler) +} diff --git a/man/moncheck.1 b/man/moncheck.1 new file mode 100644 index 0000000..ede4990 --- /dev/null +++ b/man/moncheck.1 @@ -0,0 +1,103 @@ +.TH moncheck 1 +.SH NAME +moncheck \- runs shell commands as checks +.SH SYNOPSIS +.B moncheck +[\fB\-\-config\fR \fIPATH\fR] +.SH DESCRIPTION +.B moncheck +runs commands as checks and reports the result back into the database. +.SH OPTIONS +.TP +.BR \-\-config =\fIPATH\fR +Use the specified config file. +.SH CONFIGURATION +The configuration file must be formatted in json. +Known keys and their effect are as follows: +.TP +.BR checker_id +The \fIchecker_id\fR is required and is used to look up which checks should be +run by the monzero instance. +It is okay to run multiple instances with the same \fIchecker_id\fR, as the scheduling +and locking is done in the database. + +.TP +.BR db +Set the database connection parameters to the postgres database. When using a +separate user, the user must have \fBwrite\fR permissions on the tables \fIactive_checks\fR, +\fInotifications\fR. + +\fBread\fR permissions are required on the tables \fIchecks_notify\fR, \fImapping_level\fR. + +Options to use are \fIuser\fR, \fIdbname\fR, \fIhost\fR, \fIport\fR, \fIpassword\fR. + +.TP +.BR log +The log output per default is going to stderr in a human readable way. +But it can be adjusted to via \fIoutput\fR to write to a file or one of \fIstdout\fR +or \fIstderr\fR. + +Using \fIlevel\fR with either one of \fIdebug\fR, \fIinfo\fR, \fIwarn\fR, \fIerror\fR +it is possible to limit the output. + +By adjusting the \fIformat\fR the output can be changed from \fItext\fR to \fIjson\fR +to get machine readable log output. + +.BR example + +.nf +.RS +{ + "format": "text", + "level": "info", + "output": "stderr" +} +.RE +.fi + +.TP +.BR path " - " \fRdefault: [] +Set a number of lookup paths that can be used to lookup check commands on the +filesystem. + +.TP +.BR timeout " - " \fRdefault: 30s +The timeout decides the maximum time limit a command is allowed to run. When choosing +longer timeouts be aware that timeouts can lead to more waiting checks. + +.TP +.BR wait " - " \fRdefault: 30s +The wait duration sets the time to wait between two checks and can be used to +lower database traffic or used CPU. + +.TP +.BR workers " - " \fRdefault: 25 +Set the number of workers that run check commands on parallel. The more parallel +workers there are, the higher the lock contention on the database will become, +but at the same time long running checks will have less of an impact on the +number of waiting checks. + +Tune this value according to your available resources, foremost CPU cores. + +.SH CHECK COMMAND + +A \fIcheck command\fR has to implement the nagios API of a check command. + +1. It must return a message on stdout + +2. It must have an exit code to show the severity level + +.RS +0 - check was a success + +1 - the check ended in an error + +2 - the check ended in a warning + +3 - the check is in an unknown state +.RE + +If a check takes longer it can be catched by the timeout. It should be taken care +though, that checks don't take too much time as the check interval only starts +after the check ended, which can lead to less checks done in a time period than +expected. diff --git a/moncheck.conf.example b/moncheck.conf.example index 29f5d81..4694ef0 100644 --- a/moncheck.conf.example +++ b/moncheck.conf.example @@ -9,5 +9,10 @@ "/usr/bin", "/usr/sbin" ], - "workers": 25 + "workers": 25, + "log": { + "format": "text", + "level": "info", + "output": "stderr" + } } -- cgit v1.2.3-70-g09d2