network-monitor/prometheus/alert.rules
Brian Christner 35ff636cdd
updated to latest version of stack
Signed-off-by: Brian Christner <brian.christner@gmail.com>
2018-02-01 07:54:56 +01:00

23 lines
628 B
Plaintext

groups:
- name: example
rules:
# Alert for any instance that is unreachable for >5 minutes.
- alert: service_down
expr: up == 0
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
- alert: high_load
expr: node_load1 > 0.5
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} under high load"
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."