moncheck - add mapping levels

This adds mapping levels to the check runner. When the exit code is
returned, the actual level is looked up using the configured mapping.
After that, the state is entered into the table and added the same way
to the notifications.
This commit is contained in:
Gibheer 2018-12-11 13:02:23 +01:00
parent dece1ac2dc
commit 24456400eb

View File

@ -81,7 +81,13 @@ func check(thread int, db *sql.DB, waitDuration, timeout time.Duration) {
log.Printf("[%d] could not start transaction: %s", thread, err) log.Printf("[%d] could not start transaction: %s", thread, err)
continue continue
} }
rows, err := tx.Query("select check_id, cmdLine, states, notify from active_checks where next_time < now() and enabled order by next_time for update skip locked limit 1;") rows, err := tx.Query(`select check_id, cmdLine, states, notify, mapping_id
from active_checks
where next_time < now()
and enabled
order by next_time
for update skip locked
limit 1;`)
if err != nil { if err != nil {
log.Printf("[%d] could not start query: %s", thread, err) log.Printf("[%d] could not start query: %s", thread, err)
tx.Rollback() tx.Rollback()
@ -92,6 +98,8 @@ func check(thread int, db *sql.DB, waitDuration, timeout time.Duration) {
cmdLine []string cmdLine []string
states States states States
notify bool notify bool
mapId int
state int
) )
found := false found := false
for rows.Next() { for rows.Next() {
@ -100,7 +108,8 @@ func check(thread int, db *sql.DB, waitDuration, timeout time.Duration) {
tx.Rollback() tx.Rollback()
break break
} }
if err := rows.Scan(&id, pq.Array(&cmdLine), &states, &notify); err != nil { err := rows.Scan(&id, pq.Array(&cmdLine), &states, &notify, &mapId)
if err != nil {
log.Printf("could not scan values: %s", err) log.Printf("could not scan values: %s", err)
tx.Rollback() tx.Rollback()
break break
@ -120,31 +129,37 @@ func check(thread int, db *sql.DB, waitDuration, timeout time.Duration) {
err = cmd.Run() err = cmd.Run()
if err != nil && ctx.Err() == context.DeadlineExceeded { if err != nil && ctx.Err() == context.DeadlineExceeded {
cancel() cancel()
// TODO which state to choose? state = 2
// TODO add notification handler
// TODO all this casting should be done better
states.Add(99)
fmt.Fprintf(output, "check took longer than %s", timeout) fmt.Fprintf(output, "check took longer than %s", timeout)
} else if err != nil && cmd.ProcessState == nil { } else if err != nil && cmd.ProcessState == nil {
log.Printf("[%d] error running check: %s", id, err) log.Printf("[%d] error running check: %s", id, err)
states.Add(1) state = 3
} else if err != nil { } else if err != nil {
cancel() cancel()
status, ok := cmd.ProcessState.Sys().(syscall.WaitStatus) status, ok := cmd.ProcessState.Sys().(syscall.WaitStatus)
if !ok { if !ok {
log.Printf("[%d]error running check: %s", id, err) log.Printf("[%d]error running check: %s", id, err)
states.Add(1) state = 2
} else { } else {
log.Printf("%s", cmd.ProcessState.String()) state = status.ExitStatus()
states.Add(status.ExitStatus())
} }
} else { } else {
cancel() cancel()
states.Add(0) state = 0
} }
err = db.QueryRow(`select target
from mapping_level
where mapping_id = $1 and source = $2`, mapId, state).Scan(&state)
if err != nil {
log.Printf("[%d] could not fetch error mapping for check '%d': %s", thread, id, err)
tx.Rollback()
continue
}
states.Add(state)
msg := output.String() msg := output.String()
if _, err := tx.Exec(`update active_checks if _, err := tx.Exec(`update active_checks ac
set next_time = now() + intval, states = $2, msg = $3, acknowledged = case when $4 then false else acknowledged end set next_time = now() + intval, states = $2, msg = $3, acknowledged = case when $4 then false else acknowledged end
where check_id = $1`, id, &states, &msg, states.ToOK()); err != nil { where check_id = $1`, id, &states, &msg, states.ToOK()); err != nil {
log.Printf("[%d] could not update row '%d': %s", thread, id, err) log.Printf("[%d] could not update row '%d': %s", thread, id, err)