aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGibheer <gibheer+git@zero-knowledge.org>2023-09-05 15:07:38 +0200
committerGibheer <gibheer+git@zero-knowledge.org>2023-09-05 15:07:38 +0200
commit26a4ca6ab56d23054f606bc378ba355cebce485a (patch)
treebc5568b6e520eb262a61374fd1da83c12baff208
parent8e6e01f47c320f862b9ba44815587006ba80cf77 (diff)
migrate moncheck to slog
With this moncheck itself is also changed to use slog. The early config parsing is still using log as we do not have any idea what else to use. But from then slog is used at all points. With the additional config options a man page was also added to explain the new config options.
-rw-r--r--Makefile2
-rw-r--r--cmd/moncheck/main.go77
-rw-r--r--man/moncheck.1103
-rw-r--r--moncheck.conf.example7
4 files changed, 179 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 3f43128..a9e9418 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,7 @@ exec_prefix ?= ${prefix}
bindir ?= ${exec_prefix}/bin
sysconfdir ?= ${prefix}/etc/${NAME}
datarootdir ?= ${prefix}/share
+man1dir ?= ${datarootdir}/man/man1
datadir ?= ${datarootdir}/${NAME}
WRKDIR ?= build
GOBIN ?= go
@@ -51,6 +52,7 @@ preinstall:
install-moncheck: preinstall
install -m 0755 ${WRKDIR}/moncheck ${DESTDIR}${bindir}
install -m 0644 moncheck.conf.example ${DESTDIR}${sysconfdir}
+ install -m 0644 man/moncheck.1 ${DESTDIR}${man1dir}
install-monwork: preinstall
install -m 0755 ${WRKDIR}/monwork ${DESTDIR}${bindir}
diff --git a/cmd/moncheck/main.go b/cmd/moncheck/main.go
index 73eee1a..12695d4 100644
--- a/cmd/moncheck/main.go
+++ b/cmd/moncheck/main.go
@@ -7,8 +7,10 @@ import (
"encoding/json"
"flag"
"fmt"
+ "io"
"io/ioutil"
"log"
+ "log/slog"
"os"
"strconv"
"strings"
@@ -30,6 +32,12 @@ type (
Path []string `json:"path"`
Workers int `json:"workers"`
CheckerID int `json:"checker_id"`
+
+ Log struct {
+ Format string `json:"format"`
+ Level string `json:"level"`
+ Output string `json:"output"`
+ } `json:"log"`
}
States []int
@@ -47,27 +55,34 @@ func main() {
log.Fatalf("could not parse config: %s", err)
}
+ logger := parseLogger(config)
+
if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil {
- log.Fatalf("could not set PATH: %s", err)
+ logger.Error("could not set PATH", "error", err, "configured path", config.Path)
+ os.Exit(1)
}
waitDuration, err := time.ParseDuration(config.Wait)
if err != nil {
- log.Fatalf("could not parse wait duration: %s", err)
+ logger.Error("could not parse wait duration", "error", err, "wait duration", config.Wait)
+ os.Exit(1)
}
timeout, err := time.ParseDuration(config.Timeout)
if err != nil {
- log.Fatalf("could not parse timeout: %s", err)
+ logger.Error("could not parse timeout", "error", err, "timeout", config.Timeout)
+ os.Exit(1)
}
db, err := sql.Open("postgres", config.DB)
if err != nil {
- log.Fatalf("could not open database connection: %s", err)
+ logger.Error("could not open database connection", "error", err)
+ os.Exit(1)
}
hostname, err := os.Hostname()
if err != nil {
- log.Fatalf("could not resolve hostname: %s", err)
+ logger.Error("could not resolve hostname", "error", err)
+ os.Exit(1)
}
checker, err := monzero.NewChecker(monzero.CheckerConfig{
@@ -78,22 +93,23 @@ func main() {
Executor: monzero.CheckExec,
})
if err != nil {
- log.Fatalf("could not create checker instance: %s", err)
+ logger.Error("could not create checker instance", "error", err)
+ os.Exit(1)
}
for i := 0; i < config.Workers; i++ {
- go check(checker, waitDuration)
+ go check(checker, waitDuration, logger)
}
wg := sync.WaitGroup{}
wg.Add(1)
wg.Wait()
}
-func check(checker *monzero.Checker, waitDuration time.Duration) {
+func check(checker *monzero.Checker, waitDuration time.Duration, logger *slog.Logger) {
for {
if err := checker.Next(); err != nil {
if err != monzero.ErrNoCheck {
- log.Printf("could not run check: %s", err)
+ logger.Info("check returned error", "error", err)
}
time.Sleep(waitDuration)
}
@@ -171,3 +187,46 @@ func (s *States) ToOK() bool {
}
return false
}
+
+// parse the log settings and generate the slog output
+func parseLogger(config Config) *slog.Logger {
+ var output io.Writer
+ switch config.Log.Output {
+ case "", "stderr":
+ output = os.Stderr
+ case "stdout":
+ output = os.Stdout
+ default:
+ var err error
+ output, err = os.OpenFile(config.Log.Output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640)
+ if err != nil {
+ log.Fatalf("could not open log file handler: %s", err)
+ }
+ }
+
+ var level slog.Level
+ switch config.Log.Level {
+ case "debug":
+ level = slog.LevelDebug
+ case "", "info":
+ level = slog.LevelInfo
+ case "warn":
+ level = slog.LevelWarn
+ case "error":
+ level = slog.LevelError
+ default:
+ log.Fatalf("unknown log level '%s', only 'debug', 'info', 'warn' and 'error' are supported", config.Log.Level)
+ }
+
+ var handler slog.Handler
+ switch config.Log.Format {
+ case "", "text":
+ handler = slog.NewTextHandler(output, &slog.HandlerOptions{Level: level})
+ case "json":
+ handler = slog.NewJSONHandler(output, &slog.HandlerOptions{Level: level})
+ default:
+ log.Fatalf("unknown log format '%s', only 'text' and 'json' are supported", config.Log.Format)
+ }
+
+ return slog.New(handler)
+}
diff --git a/man/moncheck.1 b/man/moncheck.1
new file mode 100644
index 0000000..ede4990
--- /dev/null
+++ b/man/moncheck.1
@@ -0,0 +1,103 @@
+.TH moncheck 1
+.SH NAME
+moncheck \- runs shell commands as checks
+.SH SYNOPSIS
+.B moncheck
+[\fB\-\-config\fR \fIPATH\fR]
+.SH DESCRIPTION
+.B moncheck
+runs commands as checks and reports the result back into the database.
+.SH OPTIONS
+.TP
+.BR \-\-config =\fIPATH\fR
+Use the specified config file.
+.SH CONFIGURATION
+The configuration file must be formatted in json.
+Known keys and their effect are as follows:
+.TP
+.BR checker_id
+The \fIchecker_id\fR is required and is used to look up which checks should be
+run by the monzero instance.
+It is okay to run multiple instances with the same \fIchecker_id\fR, as the scheduling
+and locking is done in the database.
+
+.TP
+.BR db
+Set the database connection parameters to the postgres database. When using a
+separate user, the user must have \fBwrite\fR permissions on the tables \fIactive_checks\fR,
+\fInotifications\fR.
+
+\fBread\fR permissions are required on the tables \fIchecks_notify\fR, \fImapping_level\fR.
+
+Options to use are \fIuser\fR, \fIdbname\fR, \fIhost\fR, \fIport\fR, \fIpassword\fR.
+
+.TP
+.BR log
+The log output per default is going to stderr in a human readable way.
+But it can be adjusted to via \fIoutput\fR to write to a file or one of \fIstdout\fR
+or \fIstderr\fR.
+
+Using \fIlevel\fR with either one of \fIdebug\fR, \fIinfo\fR, \fIwarn\fR, \fIerror\fR
+it is possible to limit the output.
+
+By adjusting the \fIformat\fR the output can be changed from \fItext\fR to \fIjson\fR
+to get machine readable log output.
+
+.BR example
+
+.nf
+.RS
+{
+ "format": "text",
+ "level": "info",
+ "output": "stderr"
+}
+.RE
+.fi
+
+.TP
+.BR path " - " \fRdefault: []
+Set a number of lookup paths that can be used to lookup check commands on the
+filesystem.
+
+.TP
+.BR timeout " - " \fRdefault: 30s
+The timeout decides the maximum time limit a command is allowed to run. When choosing
+longer timeouts be aware that timeouts can lead to more waiting checks.
+
+.TP
+.BR wait " - " \fRdefault: 30s
+The wait duration sets the time to wait between two checks and can be used to
+lower database traffic or used CPU.
+
+.TP
+.BR workers " - " \fRdefault: 25
+Set the number of workers that run check commands on parallel. The more parallel
+workers there are, the higher the lock contention on the database will become,
+but at the same time long running checks will have less of an impact on the
+number of waiting checks.
+
+Tune this value according to your available resources, foremost CPU cores.
+
+.SH CHECK COMMAND
+
+A \fIcheck command\fR has to implement the nagios API of a check command.
+
+1. It must return a message on stdout
+
+2. It must have an exit code to show the severity level
+
+.RS
+0 - check was a success
+
+1 - the check ended in an error
+
+2 - the check ended in a warning
+
+3 - the check is in an unknown state
+.RE
+
+If a check takes longer it can be catched by the timeout. It should be taken care
+though, that checks don't take too much time as the check interval only starts
+after the check ended, which can lead to less checks done in a time period than
+expected.
diff --git a/moncheck.conf.example b/moncheck.conf.example
index 29f5d81..4694ef0 100644
--- a/moncheck.conf.example
+++ b/moncheck.conf.example
@@ -9,5 +9,10 @@
"/usr/bin",
"/usr/sbin"
],
- "workers": 25
+ "workers": 25,
+ "log": {
+ "format": "text",
+ "level": "info",
+ "output": "stderr"
+ }
}