migrate moncheck to slog

With this moncheck itself is also changed to use slog.
The early config parsing is still using log as we do not have any idea
what else to use.
But from then slog is used at all points.

With the additional config options a man page was also added to explain
the new config options.
This commit is contained in:
Gibheer 2023-09-05 15:07:38 +02:00
parent 8e6e01f47c
commit 26a4ca6ab5
4 changed files with 179 additions and 10 deletions

View File

@ -10,6 +10,7 @@ exec_prefix ?= ${prefix}
bindir ?= ${exec_prefix}/bin
sysconfdir ?= ${prefix}/etc/${NAME}
datarootdir ?= ${prefix}/share
man1dir ?= ${datarootdir}/man/man1
datadir ?= ${datarootdir}/${NAME}
WRKDIR ?= build
GOBIN ?= go
@ -51,6 +52,7 @@ preinstall:
install-moncheck: preinstall
install -m 0755 ${WRKDIR}/moncheck ${DESTDIR}${bindir}
install -m 0644 moncheck.conf.example ${DESTDIR}${sysconfdir}
install -m 0644 man/moncheck.1 ${DESTDIR}${man1dir}
install-monwork: preinstall
install -m 0755 ${WRKDIR}/monwork ${DESTDIR}${bindir}

View File

@ -7,8 +7,10 @@ import (
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"log/slog"
"os"
"strconv"
"strings"
@ -30,6 +32,12 @@ type (
Path []string `json:"path"`
Workers int `json:"workers"`
CheckerID int `json:"checker_id"`
Log struct {
Format string `json:"format"`
Level string `json:"level"`
Output string `json:"output"`
} `json:"log"`
}
States []int
@ -47,27 +55,34 @@ func main() {
log.Fatalf("could not parse config: %s", err)
}
logger := parseLogger(config)
if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil {
log.Fatalf("could not set PATH: %s", err)
logger.Error("could not set PATH", "error", err, "configured path", config.Path)
os.Exit(1)
}
waitDuration, err := time.ParseDuration(config.Wait)
if err != nil {
log.Fatalf("could not parse wait duration: %s", err)
logger.Error("could not parse wait duration", "error", err, "wait duration", config.Wait)
os.Exit(1)
}
timeout, err := time.ParseDuration(config.Timeout)
if err != nil {
log.Fatalf("could not parse timeout: %s", err)
logger.Error("could not parse timeout", "error", err, "timeout", config.Timeout)
os.Exit(1)
}
db, err := sql.Open("postgres", config.DB)
if err != nil {
log.Fatalf("could not open database connection: %s", err)
logger.Error("could not open database connection", "error", err)
os.Exit(1)
}
hostname, err := os.Hostname()
if err != nil {
log.Fatalf("could not resolve hostname: %s", err)
logger.Error("could not resolve hostname", "error", err)
os.Exit(1)
}
checker, err := monzero.NewChecker(monzero.CheckerConfig{
@ -78,22 +93,23 @@ func main() {
Executor: monzero.CheckExec,
})
if err != nil {
log.Fatalf("could not create checker instance: %s", err)
logger.Error("could not create checker instance", "error", err)
os.Exit(1)
}
for i := 0; i < config.Workers; i++ {
go check(checker, waitDuration)
go check(checker, waitDuration, logger)
}
wg := sync.WaitGroup{}
wg.Add(1)
wg.Wait()
}
func check(checker *monzero.Checker, waitDuration time.Duration) {
func check(checker *monzero.Checker, waitDuration time.Duration, logger *slog.Logger) {
for {
if err := checker.Next(); err != nil {
if err != monzero.ErrNoCheck {
log.Printf("could not run check: %s", err)
logger.Info("check returned error", "error", err)
}
time.Sleep(waitDuration)
}
@ -171,3 +187,46 @@ func (s *States) ToOK() bool {
}
return false
}
// parse the log settings and generate the slog output
func parseLogger(config Config) *slog.Logger {
var output io.Writer
switch config.Log.Output {
case "", "stderr":
output = os.Stderr
case "stdout":
output = os.Stdout
default:
var err error
output, err = os.OpenFile(config.Log.Output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640)
if err != nil {
log.Fatalf("could not open log file handler: %s", err)
}
}
var level slog.Level
switch config.Log.Level {
case "debug":
level = slog.LevelDebug
case "", "info":
level = slog.LevelInfo
case "warn":
level = slog.LevelWarn
case "error":
level = slog.LevelError
default:
log.Fatalf("unknown log level '%s', only 'debug', 'info', 'warn' and 'error' are supported", config.Log.Level)
}
var handler slog.Handler
switch config.Log.Format {
case "", "text":
handler = slog.NewTextHandler(output, &slog.HandlerOptions{Level: level})
case "json":
handler = slog.NewJSONHandler(output, &slog.HandlerOptions{Level: level})
default:
log.Fatalf("unknown log format '%s', only 'text' and 'json' are supported", config.Log.Format)
}
return slog.New(handler)
}

103
man/moncheck.1 Normal file
View File

@ -0,0 +1,103 @@
.TH moncheck 1
.SH NAME
moncheck \- runs shell commands as checks
.SH SYNOPSIS
.B moncheck
[\fB\-\-config\fR \fIPATH\fR]
.SH DESCRIPTION
.B moncheck
runs commands as checks and reports the result back into the database.
.SH OPTIONS
.TP
.BR \-\-config =\fIPATH\fR
Use the specified config file.
.SH CONFIGURATION
The configuration file must be formatted in json.
Known keys and their effect are as follows:
.TP
.BR checker_id
The \fIchecker_id\fR is required and is used to look up which checks should be
run by the monzero instance.
It is okay to run multiple instances with the same \fIchecker_id\fR, as the scheduling
and locking is done in the database.
.TP
.BR db
Set the database connection parameters to the postgres database. When using a
separate user, the user must have \fBwrite\fR permissions on the tables \fIactive_checks\fR,
\fInotifications\fR.
\fBread\fR permissions are required on the tables \fIchecks_notify\fR, \fImapping_level\fR.
Options to use are \fIuser\fR, \fIdbname\fR, \fIhost\fR, \fIport\fR, \fIpassword\fR.
.TP
.BR log
The log output per default is going to stderr in a human readable way.
But it can be adjusted to via \fIoutput\fR to write to a file or one of \fIstdout\fR
or \fIstderr\fR.
Using \fIlevel\fR with either one of \fIdebug\fR, \fIinfo\fR, \fIwarn\fR, \fIerror\fR
it is possible to limit the output.
By adjusting the \fIformat\fR the output can be changed from \fItext\fR to \fIjson\fR
to get machine readable log output.
.BR example
.nf
.RS
{
"format": "text",
"level": "info",
"output": "stderr"
}
.RE
.fi
.TP
.BR path " - " \fRdefault: []
Set a number of lookup paths that can be used to lookup check commands on the
filesystem.
.TP
.BR timeout " - " \fRdefault: 30s
The timeout decides the maximum time limit a command is allowed to run. When choosing
longer timeouts be aware that timeouts can lead to more waiting checks.
.TP
.BR wait " - " \fRdefault: 30s
The wait duration sets the time to wait between two checks and can be used to
lower database traffic or used CPU.
.TP
.BR workers " - " \fRdefault: 25
Set the number of workers that run check commands on parallel. The more parallel
workers there are, the higher the lock contention on the database will become,
but at the same time long running checks will have less of an impact on the
number of waiting checks.
Tune this value according to your available resources, foremost CPU cores.
.SH CHECK COMMAND
A \fIcheck command\fR has to implement the nagios API of a check command.
1. It must return a message on stdout
2. It must have an exit code to show the severity level
.RS
0 - check was a success
1 - the check ended in an error
2 - the check ended in a warning
3 - the check is in an unknown state
.RE
If a check takes longer it can be catched by the timeout. It should be taken care
though, that checks don't take too much time as the check interval only starts
after the check ended, which can lead to less checks done in a time period than
expected.

View File

@ -9,5 +9,10 @@
"/usr/bin",
"/usr/sbin"
],
"workers": 25
"workers": 25,
"log": {
"format": "text",
"level": "info",
"output": "stderr"
}
}