aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--cmd/moncheck/main.go77
-rw-r--r--man/moncheck.1103
-rw-r--r--moncheck.conf.example7
4 files changed, 179 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 3f43128..a9e9418 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,7 @@ exec_prefix ?= ${prefix}
bindir ?= ${exec_prefix}/bin
sysconfdir ?= ${prefix}/etc/${NAME}
datarootdir ?= ${prefix}/share
+man1dir ?= ${datarootdir}/man/man1
datadir ?= ${datarootdir}/${NAME}
WRKDIR ?= build
GOBIN ?= go
@@ -51,6 +52,7 @@ preinstall:
install-moncheck: preinstall
install -m 0755 ${WRKDIR}/moncheck ${DESTDIR}${bindir}
install -m 0644 moncheck.conf.example ${DESTDIR}${sysconfdir}
+ install -m 0644 man/moncheck.1 ${DESTDIR}${man1dir}
install-monwork: preinstall
install -m 0755 ${WRKDIR}/monwork ${DESTDIR}${bindir}
diff --git a/cmd/moncheck/main.go b/cmd/moncheck/main.go
index 73eee1a..12695d4 100644
--- a/cmd/moncheck/main.go
+++ b/cmd/moncheck/main.go
@@ -7,8 +7,10 @@ import (
"encoding/json"
"flag"
"fmt"
+ "io"
"io/ioutil"
"log"
+ "log/slog"
"os"
"strconv"
"strings"
@@ -30,6 +32,12 @@ type (
Path []string `json:"path"`
Workers int `json:"workers"`
CheckerID int `json:"checker_id"`
+
+ Log struct {
+ Format string `json:"format"`
+ Level string `json:"level"`
+ Output string `json:"output"`
+ } `json:"log"`
}
States []int
@@ -47,27 +55,34 @@ func main() {
log.Fatalf("could not parse config: %s", err)
}
+ logger := parseLogger(config)
+
if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil {
- log.Fatalf("could not set PATH: %s", err)
+ logger.Error("could not set PATH", "error", err, "configured path", config.Path)
+ os.Exit(1)
}
waitDuration, err := time.ParseDuration(config.Wait)
if err != nil {
- log.Fatalf("could not parse wait duration: %s", err)
+ logger.Error("could not parse wait duration", "error", err, "wait duration", config.Wait)
+ os.Exit(1)
}
timeout, err := time.ParseDuration(config.Timeout)
if err != nil {
- log.Fatalf("could not parse timeout: %s", err)
+ logger.Error("could not parse timeout", "error", err, "timeout", config.Timeout)
+ os.Exit(1)
}
db, err := sql.Open("postgres", config.DB)
if err != nil {
- log.Fatalf("could not open database connection: %s", err)
+ logger.Error("could not open database connection", "error", err)
+ os.Exit(1)
}
hostname, err := os.Hostname()
if err != nil {
- log.Fatalf("could not resolve hostname: %s", err)
+ logger.Error("could not resolve hostname", "error", err)
+ os.Exit(1)
}
checker, err := monzero.NewChecker(monzero.CheckerConfig{
@@ -78,22 +93,23 @@ func main() {
Executor: monzero.CheckExec,
})
if err != nil {
- log.Fatalf("could not create checker instance: %s", err)
+ logger.Error("could not create checker instance", "error", err)
+ os.Exit(1)
}
for i := 0; i < config.Workers; i++ {
- go check(checker, waitDuration)
+ go check(checker, waitDuration, logger)
}
wg := sync.WaitGroup{}
wg.Add(1)
wg.Wait()
}
-func check(checker *monzero.Checker, waitDuration time.Duration) {
+func check(checker *monzero.Checker, waitDuration time.Duration, logger *slog.Logger) {
for {
if err := checker.Next(); err != nil {
if err != monzero.ErrNoCheck {
- log.Printf("could not run check: %s", err)
+ logger.Info("check returned error", "error", err)
}
time.Sleep(waitDuration)
}
@@ -171,3 +187,46 @@ func (s *States) ToOK() bool {
}
return false
}
+
+// parse the log settings and generate the slog output
+func parseLogger(config Config) *slog.Logger {
+ var output io.Writer
+ switch config.Log.Output {
+ case "", "stderr":
+ output = os.Stderr
+ case "stdout":
+ output = os.Stdout
+ default:
+ var err error
+ output, err = os.OpenFile(config.Log.Output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640)
+ if err != nil {
+ log.Fatalf("could not open log file handler: %s", err)
+ }
+ }
+
+ var level slog.Level
+ switch config.Log.Level {
+ case "debug":
+ level = slog.LevelDebug
+ case "", "info":
+ level = slog.LevelInfo
+ case "warn":
+ level = slog.LevelWarn
+ case "error":
+ level = slog.LevelError
+ default:
+ log.Fatalf("unknown log level '%s', only 'debug', 'info', 'warn' and 'error' are supported", config.Log.Level)
+ }
+
+ var handler slog.Handler
+ switch config.Log.Format {
+ case "", "text":
+ handler = slog.NewTextHandler(output, &slog.HandlerOptions{Level: level})
+ case "json":
+ handler = slog.NewJSONHandler(output, &slog.HandlerOptions{Level: level})
+ default:
+ log.Fatalf("unknown log format '%s', only 'text' and 'json' are supported", config.Log.Format)
+ }
+
+ return slog.New(handler)
+}
diff --git a/man/moncheck.1 b/man/moncheck.1
new file mode 100644
index 0000000..ede4990
--- /dev/null
+++ b/man/moncheck.1
@@ -0,0 +1,103 @@
+.TH moncheck 1
+.SH NAME
+moncheck \- runs shell commands as checks
+.SH SYNOPSIS
+.B moncheck
+[\fB\-\-config\fR \fIPATH\fR]
+.SH DESCRIPTION
+.B moncheck
+runs commands as checks and reports the result back into the database.
+.SH OPTIONS
+.TP
+.BR \-\-config =\fIPATH\fR
+Use the specified config file.
+.SH CONFIGURATION
+The configuration file must be formatted in json.
+Known keys and their effect are as follows:
+.TP
+.BR checker_id
+The \fIchecker_id\fR is required and is used to look up which checks should be
+run by the monzero instance.
+It is okay to run multiple instances with the same \fIchecker_id\fR, as the scheduling
+and locking is done in the database.
+
+.TP
+.BR db
+Set the database connection parameters to the postgres database. When using a
+separate user, the user must have \fBwrite\fR permissions on the tables \fIactive_checks\fR,
+\fInotifications\fR.
+
+\fBread\fR permissions are required on the tables \fIchecks_notify\fR, \fImapping_level\fR.
+
+Options to use are \fIuser\fR, \fIdbname\fR, \fIhost\fR, \fIport\fR, \fIpassword\fR.
+
+.TP
+.BR log
+The log output per default is going to stderr in a human readable way.
+But it can be adjusted to via \fIoutput\fR to write to a file or one of \fIstdout\fR
+or \fIstderr\fR.
+
+Using \fIlevel\fR with either one of \fIdebug\fR, \fIinfo\fR, \fIwarn\fR, \fIerror\fR
+it is possible to limit the output.
+
+By adjusting the \fIformat\fR the output can be changed from \fItext\fR to \fIjson\fR
+to get machine readable log output.
+
+.BR example
+
+.nf
+.RS
+{
+ "format": "text",
+ "level": "info",
+ "output": "stderr"
+}
+.RE
+.fi
+
+.TP
+.BR path " - " \fRdefault: []
+Set a number of lookup paths that can be used to lookup check commands on the
+filesystem.
+
+.TP
+.BR timeout " - " \fRdefault: 30s
+The timeout decides the maximum time limit a command is allowed to run. When choosing
+longer timeouts be aware that timeouts can lead to more waiting checks.
+
+.TP
+.BR wait " - " \fRdefault: 30s
+The wait duration sets the time to wait between two checks and can be used to
+lower database traffic or used CPU.
+
+.TP
+.BR workers " - " \fRdefault: 25
+Set the number of workers that run check commands on parallel. The more parallel
+workers there are, the higher the lock contention on the database will become,
+but at the same time long running checks will have less of an impact on the
+number of waiting checks.
+
+Tune this value according to your available resources, foremost CPU cores.
+
+.SH CHECK COMMAND
+
+A \fIcheck command\fR has to implement the nagios API of a check command.
+
+1. It must return a message on stdout
+
+2. It must have an exit code to show the severity level
+
+.RS
+0 - check was a success
+
+1 - the check ended in an error
+
+2 - the check ended in a warning
+
+3 - the check is in an unknown state
+.RE
+
+If a check takes longer it can be catched by the timeout. It should be taken care
+though, that checks don't take too much time as the check interval only starts
+after the check ended, which can lead to less checks done in a time period than
+expected.
diff --git a/moncheck.conf.example b/moncheck.conf.example
index 29f5d81..4694ef0 100644
--- a/moncheck.conf.example
+++ b/moncheck.conf.example
@@ -9,5 +9,10 @@
"/usr/bin",
"/usr/sbin"
],
- "workers": 25
+ "workers": 25,
+ "log": {
+ "format": "text",
+ "level": "info",
+ "output": "stderr"
+ }
}