grafana: make alerts better for fux
Some checks failed
/ Ansible Lint (push) Failing after 48s
/ Ansible Lint (pull_request) Failing after 49s

This commit is contained in:
chris 2025-09-01 01:25:11 +02:00
commit 068b261745
Signed by: c6ristian
SSH key fingerprint: SHA256:B3m+yzpaxGXSEcDBpPHfvza/DNC0wuX+CKMeGq8wgak
6 changed files with 47 additions and 4 deletions

View file

@ -0,0 +1,41 @@
groups:
- name: Fux-Generic
rules:
- alert: HostJobFlaky
expr: group by(instance, job) (changes(up{org="fux"}[24h]) > 7)
for: 0m
labels:
severity: info
org: fux
annotations:
summary: Job {{ $labels.job }} flaky on (instance {{ $labels.instance }})
description: "The job {{ $labels.job }} on target: {{ $labels.instance }} has been flaky over the last 24 hours."
- name: Fux-SNMP
rules:
- alert: SnmpTargetMissing
expr: up{job=~".*snmp.*", org="fux"} == 0
for: 15m
labels:
severity: critical
org: fux
annotations:
summary: SNMP target missing (instance {{ $labels.instance }})
description: "SNMP target: {{ $labels.instance }} has disappeared for more the 15 min."
- name: Fux-DHCP
rules:
- alert: DhcpFuxSharedFailed
expr: script_success{script="check_dhcp_fux_shared"} == 0
for: 0m
labels:
severity: critical
annotations:
summary: DHCP for Fux Shared stoped working
description: "No DHCP lease for the Fux Shared range was received \n V"
- alert: DhcpFuxAdminFailed
expr: script_success{script_success="check_dhcp_fux_admin"} == 0
for: 0m
labels:
severity: critical
annotations:
summary: DHCP for Fux Admin stoped working
description: "No DHCP lease for the Fux Admin range was received"