Gibheer
af843d76e1
It would be nice to support multiple notification mechanisms with a way to disable all or part of them. So for now, this is bound to the check but may change again in the future. Apart from that, all notifications get written to the notification table, so that the notifiers can do their job and ACK their own notifications.
251 lines
5.7 KiB
Go
251 lines
5.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"database/sql"
|
|
"database/sql/driver"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/lib/pq"
|
|
)
|
|
|
|
var (
|
|
configPath = flag.String("config", "moncheck.conf", "path to the config file")
|
|
)
|
|
|
|
type (
|
|
Config struct {
|
|
DB string `json:"db"`
|
|
Timeout string `json:"timeout"`
|
|
Wait string `json:"wait"`
|
|
Path []string `json:"path"`
|
|
}
|
|
|
|
States []int
|
|
)
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
raw, err := ioutil.ReadFile(*configPath)
|
|
if err != nil {
|
|
log.Fatalf("could not read config: %s", err)
|
|
}
|
|
config := Config{Timeout: "30s", Wait: "30s"}
|
|
if err := json.Unmarshal(raw, &config); err != nil {
|
|
log.Fatalf("could not parse config: %s", err)
|
|
}
|
|
|
|
if err := os.Setenv("PATH", strings.Join(config.Path, ":")); err != nil {
|
|
log.Fatalf("could not set PATH: %s", err)
|
|
}
|
|
|
|
waitDuration, err := time.ParseDuration(config.Wait)
|
|
if err != nil {
|
|
log.Fatalf("could not parse wait duration: %s", err)
|
|
}
|
|
timeout, err := time.ParseDuration(config.Timeout)
|
|
if err != nil {
|
|
log.Fatalf("could not parse timeout: %s", err)
|
|
}
|
|
|
|
db, err := sql.Open("postgres", config.DB)
|
|
if err != nil {
|
|
log.Fatalf("could not open database connection: %s", err)
|
|
}
|
|
|
|
for i := 0; i < 25; i++ {
|
|
go check(i, db, waitDuration, timeout)
|
|
}
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(1)
|
|
wg.Wait()
|
|
}
|
|
|
|
func check(thread int, db *sql.DB, waitDuration, timeout time.Duration) {
|
|
for {
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
log.Printf("[%d] could not start transaction: %s", thread, err)
|
|
continue
|
|
}
|
|
rows, err := tx.Query(`select check_id, cmdLine, states, mapping_id
|
|
from active_checks
|
|
where next_time < now()
|
|
and enabled
|
|
order by next_time
|
|
for update skip locked
|
|
limit 1;`)
|
|
if err != nil {
|
|
log.Printf("[%d] could not start query: %s", thread, err)
|
|
tx.Rollback()
|
|
continue
|
|
}
|
|
var (
|
|
id int64
|
|
cmdLine []string
|
|
states States
|
|
mapId int
|
|
state int
|
|
)
|
|
found := false
|
|
for rows.Next() {
|
|
if err := rows.Err(); err != nil {
|
|
log.Printf("could not fetch row: %s", err)
|
|
tx.Rollback()
|
|
break
|
|
}
|
|
err := rows.Scan(&id, pq.Array(&cmdLine), &states, &mapId)
|
|
if err != nil {
|
|
log.Printf("could not scan values: %s", err)
|
|
tx.Rollback()
|
|
break
|
|
}
|
|
found = true
|
|
}
|
|
if !found {
|
|
time.Sleep(waitDuration)
|
|
tx.Rollback()
|
|
continue
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
cmd := exec.CommandContext(ctx, cmdLine[0], cmdLine[1:]...)
|
|
output := bytes.NewBuffer([]byte{})
|
|
cmd.Stdout = output
|
|
cmd.Stderr = output
|
|
err = cmd.Run()
|
|
if err != nil && ctx.Err() == context.DeadlineExceeded {
|
|
cancel()
|
|
state = 2
|
|
fmt.Fprintf(output, "check took longer than %s", timeout)
|
|
} else if err != nil && cmd.ProcessState == nil {
|
|
log.Printf("[%d] error running check: %s", id, err)
|
|
state = 3
|
|
} else if err != nil {
|
|
cancel()
|
|
status, ok := cmd.ProcessState.Sys().(syscall.WaitStatus)
|
|
if !ok {
|
|
log.Printf("[%d]error running check: %s", id, err)
|
|
state = 2
|
|
} else {
|
|
state = status.ExitStatus()
|
|
}
|
|
} else {
|
|
cancel()
|
|
state = 0
|
|
}
|
|
|
|
err = db.QueryRow(`select target
|
|
from mapping_level
|
|
where mapping_id = $1 and source = $2`, mapId, state).Scan(&state)
|
|
if err != nil {
|
|
log.Printf("[%d] could not fetch error mapping for check '%d': %s", thread, id, err)
|
|
tx.Rollback()
|
|
continue
|
|
}
|
|
states.Add(state)
|
|
msg := output.String()
|
|
|
|
if _, err := tx.Exec(`update active_checks ac
|
|
set next_time = now() + intval, states = $2, msg = $3, acknowledged = case when $4 then false else acknowledged end
|
|
where check_id = $1`, id, &states, &msg, states.ToOK()); err != nil {
|
|
log.Printf("[%d] could not update row '%d': %s", thread, id, err)
|
|
tx.Rollback()
|
|
continue
|
|
}
|
|
if _, err := tx.Exec(`insert into notifications(check_id, states, output, mapping_id, notifier_id)
|
|
select $1, $2, $3, $4, cn.notifier_id
|
|
from checks_notify cn
|
|
where cn.check_id = $1`, &id, &states, &msg, &mapId); err != nil {
|
|
log.Printf("[%d] could not create notification for '%d': %s", thread, id, err)
|
|
tx.Rollback()
|
|
continue
|
|
}
|
|
tx.Commit()
|
|
}
|
|
}
|
|
|
|
func (s *States) Value() (driver.Value, error) {
|
|
last := len(*s)
|
|
if last == 0 {
|
|
return "{}", nil
|
|
}
|
|
result := strings.Builder{}
|
|
_, err := result.WriteString("{")
|
|
if err != nil {
|
|
return "", fmt.Errorf("could not write to buffer: %s", err)
|
|
}
|
|
for i, state := range *s {
|
|
if _, err := fmt.Fprintf(&result, "%d", state); err != nil {
|
|
return "", fmt.Errorf("could not write to buffer: %s", err)
|
|
}
|
|
if i < last-1 {
|
|
if _, err := result.WriteString(","); err != nil {
|
|
return "", fmt.Errorf("could not write to buffer: %s", err)
|
|
}
|
|
}
|
|
}
|
|
if _, err := result.WriteString("}"); err != nil {
|
|
return "", fmt.Errorf("could not write to buffer: %s", err)
|
|
}
|
|
return result.String(), nil
|
|
}
|
|
|
|
func (s *States) Scan(src interface{}) error {
|
|
switch src := src.(type) {
|
|
case []byte:
|
|
tmp := bytes.Trim(src, "{}")
|
|
states := bytes.Split(tmp, []byte(","))
|
|
result := make([]int, len(states))
|
|
for i, state := range states {
|
|
var err error
|
|
result[i], err = strconv.Atoi(string(state))
|
|
if err != nil {
|
|
return fmt.Errorf("could not parse element %s: %s", state, err)
|
|
}
|
|
}
|
|
*s = result
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("could not convert %T to states", src)
|
|
}
|
|
}
|
|
|
|
// Append prepends the new state before all others.
|
|
func (s *States) Add(state int) {
|
|
vals := *s
|
|
statePos := 5
|
|
if len(vals) < 6 {
|
|
statePos = len(vals)
|
|
}
|
|
*s = append([]int{state}, vals[:statePos]...)
|
|
return
|
|
}
|
|
|
|
// ToOK returns true when the state returns from != 0 to 0.
|
|
func (s *States) ToOK() bool {
|
|
vals := *s
|
|
if len(vals) == 0 {
|
|
return false
|
|
}
|
|
if len(vals) <= 1 {
|
|
return vals[0] == 0
|
|
}
|
|
if vals[0] == 0 && vals[1] > 0 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|