groups: - name: Generic rules: - alert: HostJobFlaky expr: group by(host, job) (changes(up{org="fux", job!="integrations/unix"}[24h]) > 5) for: 0m labels: severity: info annotations: summary: Job {{ $labels.job }} flaky on (host {{ $labels.instance }}) description: "The job {{ $labels.job }} on target: {{ labels.host }} has been flaky over the last 24 hours.\n VALUE = {{ $value }}" - name: SNMP rules: - alert: SnmpTargetMissing expr: up{job=~".*snmp|SNMP.*"} == 0 for: 15m labels: severity: critical annotations: summary: SNMP target missing (instance {{ $labels.instance }}) description: "A SNMP target has disappeared for more the 15 min.\n VALUE = {{ $value }}" - name: DHCP rules: - alert: DhcpFuxSharedFailed expr: script_success{script="check_dhcp_fux_shared"} == 0 for: 0m labels: severity: critical annotations: summary: DHCP for Fux Shared stoped working description: "No DHCP lease for the Fux Shared range was received" - alert: DhcpFuxAdminFailed expr: script_success{script_success="check_dhcp_fux_admin"} == 0 for: 0m labels: severity: critical annotations: summary: DHCP for Fux Admin stoped working description: "No DHCP lease for the Fux Admin range was received"