diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml index b1836a3..97de744 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml @@ -10,6 +10,15 @@ groups: annotations: summary: Job {{ $labels.job }} flaky on (instance {{ $labels.instance }}) description: "The job {{ $labels.job }} on target: {{ $labels.instance }} has been flaky over the last 24 hours." + - alert: ProbeFailed + expr: group by(instance, job, ip) (probe_success{org="fux"} == 0) + for: 1m + labels: + severity: critical + org: fux + annotations: + summary: "Probe failed for {{ $labels.instance }} (job: {{ $labels.job }})" + description: "The Probe: {{ $labels.job }} can not complete its job for {{ $labels.instance }}, this most likely means that the instance is unreachable." - name: Fux-SNMP rules: - alert: SnmpTargetMissing