diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml index eb58477..39dd928 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml @@ -1,4 +1,14 @@ groups: + - name: Generic + rules: + - alert: HostJobFlaky + expr: group by(host, job) (changes(up{org="fux", job!="integrations/unix"}[24h]) > 5) + for: 0m + labels: + severity: info + annotations: + summary: Job {{ $labels.job }} flaky on (host {{ $labels.instance }}) + description: "The job {{ $labels.job }} on target: {{ labels.host }} has been flaky over the last 24 hours.\n VALUE = {{ $value }}" - name: SNMP rules: - alert: SnmpTargetMissing @@ -8,15 +18,7 @@ groups: severity: critical annotations: summary: SNMP target missing (instance {{ $labels.instance }}) - description: "A SNMP target has disappeared for more the 30 min.\n VALUE = {{ $value }}" - - alert: SnmpTargetFalky - expr: changes(up{job=~"snmp"}[24h]) > 5 - for: 0m - labels: - severity: info - annotations: - summary: SNMP target flaky (instance {{ $labels.instance }}) - description: "A SNMP target is has a flaky respons over the last 24 hours.\n VALUE = {{ $value }}" + description: "A SNMP target has disappeared for more the 15 min.\n VALUE = {{ $value }}" - name: DHCP rules: - alert: DhcpFuxSharedFailed