diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | cmd/moncheck/main.go | 77 | ||||
-rw-r--r-- | man/moncheck.1 | 103 | ||||
-rw-r--r-- | moncheck.conf.example | 7 |
4 files changed, 179 insertions, 10 deletions
@@ -10,6 +10,7 @@ exec_prefix ?= ${prefix} bindir ?= ${exec_prefix}/bin sysconfdir ?= ${prefix}/etc/${NAME} datarootdir ?= ${prefix}/share +man1dir ?= ${datarootdir}/man/man1 datadir ?= ${datarootdir}/${NAME} WRKDIR ?= build GOBIN ?= go @@ -51,6 +52,7 @@ preinstall: install-moncheck: preinstall install -m 0755 ${WRKDIR}/moncheck ${DESTDIR}${bindir} install -m 0644 moncheck.conf.example ${DESTDIR}${sysconfdir} + install -m 0644 man/moncheck.1 ${DESTDIR}${man1dir} install-monwork: preinstall install -m 0755 ${WRKDIR}/monwork ${DESTDIR}${bindir} diff --git a/cmd/moncheck/main.go b/cmd/moncheck/main.go index 73eee1a..12695d4 100644 --- a/cmd/moncheck/main.go +++ b/cmd/moncheck/main.go @@ -7,8 +7,10 @@ import ( "encoding/json" "flag" "fmt" + "io" "io/ioutil" "log" + "log/slog" "os" "strconv" "strings" @@ -30,6 +32,12 @@ type ( Path []string `json:"path"` Workers int `json:"workers"` CheckerID int `json:"checker_id"` + + Log struct { + Format string `json:"format"` + Level string `json:"level"` + Output string `json:"output"` + } `json:"log"` } States []int @@ -47,27 +55,34 @@ func main() { log.Fatalf("could not parse config: %s", err) } + logger := parseLogger(config) + if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil { - log.Fatalf("could not set PATH: %s", err) + logger.Error("could not set PATH", "error", err, "configured path", config.Path) + os.Exit(1) } waitDuration, err := time.ParseDuration(config.Wait) if err != nil { - log.Fatalf("could not parse wait duration: %s", err) + logger.Error("could not parse wait duration", "error", err, "wait duration", config.Wait) + os.Exit(1) } timeout, err := time.ParseDuration(config.Timeout) if err != nil { - log.Fatalf("could not parse timeout: %s", err) + logger.Error("could not parse timeout", "error", err, "timeout", config.Timeout) + os.Exit(1) } db, err := sql.Open("postgres", config.DB) if err != nil { - log.Fatalf("could not open database connection: %s", err) + logger.Error("could not open database connection", "error", err) + os.Exit(1) } hostname, err := os.Hostname() if err != nil { - log.Fatalf("could not resolve hostname: %s", err) + logger.Error("could not resolve hostname", "error", err) + os.Exit(1) } checker, err := monzero.NewChecker(monzero.CheckerConfig{ @@ -78,22 +93,23 @@ func main() { Executor: monzero.CheckExec, }) if err != nil { - log.Fatalf("could not create checker instance: %s", err) + logger.Error("could not create checker instance", "error", err) + os.Exit(1) } for i := 0; i < config.Workers; i++ { - go check(checker, waitDuration) + go check(checker, waitDuration, logger) } wg := sync.WaitGroup{} wg.Add(1) wg.Wait() } -func check(checker *monzero.Checker, waitDuration time.Duration) { +func check(checker *monzero.Checker, waitDuration time.Duration, logger *slog.Logger) { for { if err := checker.Next(); err != nil { if err != monzero.ErrNoCheck { - log.Printf("could not run check: %s", err) + logger.Info("check returned error", "error", err) } time.Sleep(waitDuration) } @@ -171,3 +187,46 @@ func (s *States) ToOK() bool { } return false } + +// parse the log settings and generate the slog output +func parseLogger(config Config) *slog.Logger { + var output io.Writer + switch config.Log.Output { + case "", "stderr": + output = os.Stderr + case "stdout": + output = os.Stdout + default: + var err error + output, err = os.OpenFile(config.Log.Output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640) + if err != nil { + log.Fatalf("could not open log file handler: %s", err) + } + } + + var level slog.Level + switch config.Log.Level { + case "debug": + level = slog.LevelDebug + case "", "info": + level = slog.LevelInfo + case "warn": + level = slog.LevelWarn + case "error": + level = slog.LevelError + default: + log.Fatalf("unknown log level '%s', only 'debug', 'info', 'warn' and 'error' are supported", config.Log.Level) + } + + var handler slog.Handler + switch config.Log.Format { + case "", "text": + handler = slog.NewTextHandler(output, &slog.HandlerOptions{Level: level}) + case "json": + handler = slog.NewJSONHandler(output, &slog.HandlerOptions{Level: level}) + default: + log.Fatalf("unknown log format '%s', only 'text' and 'json' are supported", config.Log.Format) + } + + return slog.New(handler) +} diff --git a/man/moncheck.1 b/man/moncheck.1 new file mode 100644 index 0000000..ede4990 --- /dev/null +++ b/man/moncheck.1 @@ -0,0 +1,103 @@ +.TH moncheck 1 +.SH NAME +moncheck \- runs shell commands as checks +.SH SYNOPSIS +.B moncheck +[\fB\-\-config\fR \fIPATH\fR] +.SH DESCRIPTION +.B moncheck +runs commands as checks and reports the result back into the database. +.SH OPTIONS +.TP +.BR \-\-config =\fIPATH\fR +Use the specified config file. +.SH CONFIGURATION +The configuration file must be formatted in json. +Known keys and their effect are as follows: +.TP +.BR checker_id +The \fIchecker_id\fR is required and is used to look up which checks should be +run by the monzero instance. +It is okay to run multiple instances with the same \fIchecker_id\fR, as the scheduling +and locking is done in the database. + +.TP +.BR db +Set the database connection parameters to the postgres database. When using a +separate user, the user must have \fBwrite\fR permissions on the tables \fIactive_checks\fR, +\fInotifications\fR. + +\fBread\fR permissions are required on the tables \fIchecks_notify\fR, \fImapping_level\fR. + +Options to use are \fIuser\fR, \fIdbname\fR, \fIhost\fR, \fIport\fR, \fIpassword\fR. + +.TP +.BR log +The log output per default is going to stderr in a human readable way. +But it can be adjusted to via \fIoutput\fR to write to a file or one of \fIstdout\fR +or \fIstderr\fR. + +Using \fIlevel\fR with either one of \fIdebug\fR, \fIinfo\fR, \fIwarn\fR, \fIerror\fR +it is possible to limit the output. + +By adjusting the \fIformat\fR the output can be changed from \fItext\fR to \fIjson\fR +to get machine readable log output. + +.BR example + +.nf +.RS +{ + "format": "text", + "level": "info", + "output": "stderr" +} +.RE +.fi + +.TP +.BR path " - " \fRdefault: [] +Set a number of lookup paths that can be used to lookup check commands on the +filesystem. + +.TP +.BR timeout " - " \fRdefault: 30s +The timeout decides the maximum time limit a command is allowed to run. When choosing +longer timeouts be aware that timeouts can lead to more waiting checks. + +.TP +.BR wait " - " \fRdefault: 30s +The wait duration sets the time to wait between two checks and can be used to +lower database traffic or used CPU. + +.TP +.BR workers " - " \fRdefault: 25 +Set the number of workers that run check commands on parallel. The more parallel +workers there are, the higher the lock contention on the database will become, +but at the same time long running checks will have less of an impact on the +number of waiting checks. + +Tune this value according to your available resources, foremost CPU cores. + +.SH CHECK COMMAND + +A \fIcheck command\fR has to implement the nagios API of a check command. + +1. It must return a message on stdout + +2. It must have an exit code to show the severity level + +.RS +0 - check was a success + +1 - the check ended in an error + +2 - the check ended in a warning + +3 - the check is in an unknown state +.RE + +If a check takes longer it can be catched by the timeout. It should be taken care +though, that checks don't take too much time as the check interval only starts +after the check ended, which can lead to less checks done in a time period than +expected. diff --git a/moncheck.conf.example b/moncheck.conf.example index 29f5d81..4694ef0 100644 --- a/moncheck.conf.example +++ b/moncheck.conf.example @@ -9,5 +9,10 @@ "/usr/bin", "/usr/sbin" ], - "workers": 25 + "workers": 25, + "log": { + "format": "text", + "level": "info", + "output": "stderr" + } } |