Compare commits
No commits in common. "7f9d282155a1cfea158adac3e5fd1d520ec10905" and "13a8dc9b6f6205b7be2353bb7bb8a3e520cd7f99" have entirely different histories.
7f9d282155
...
13a8dc9b6f
3 changed files with 2 additions and 43 deletions
|
@ -10,8 +10,6 @@ docker_compose__configuration_files:
|
||||||
content: "{{ lookup('ansible.builtin.template', 'resources/chaosknoten/grafana/docker_compose/alertmanager.yaml.j2') }}"
|
content: "{{ lookup('ansible.builtin.template', 'resources/chaosknoten/grafana/docker_compose/alertmanager.yaml.j2') }}"
|
||||||
- name: prometheus_alerts.rules.yaml
|
- name: prometheus_alerts.rules.yaml
|
||||||
content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml') }}"
|
content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml') }}"
|
||||||
- name: prometheus_alerts-fux.rules.yaml
|
|
||||||
content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/grafana/docker_compose/prometheus_alerts-fux.rules.yaml') }}"
|
|
||||||
- name: alertmanager_alert_templates.tmpl
|
- name: alertmanager_alert_templates.tmpl
|
||||||
content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/grafana/docker_compose/alertmanager_alert_templates.tmpl') }}"
|
content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/grafana/docker_compose/alertmanager_alert_templates.tmpl') }}"
|
||||||
- name: loki.yaml
|
- name: loki.yaml
|
||||||
|
|
|
@ -1,39 +0,0 @@
|
||||||
groups:
|
|
||||||
- name: Generic
|
|
||||||
rules:
|
|
||||||
- alert: HostJobFlaky
|
|
||||||
expr: group by(host, job) (changes(up{org="fux", job!="integrations/unix"}[24h]) > 5)
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: info
|
|
||||||
annotations:
|
|
||||||
summary: Job {{ $labels.job }} flaky on (host {{ $labels.instance }})
|
|
||||||
description: "The job {{ $labels.job }} on target: {{ labels.host }} has been flaky over the last 24 hours.\n VALUE = {{ $value }}"
|
|
||||||
- name: SNMP
|
|
||||||
rules:
|
|
||||||
- alert: SnmpTargetMissing
|
|
||||||
expr: up{job=~".*snmp|SNMP.*"} == 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: SNMP target missing (instance {{ $labels.instance }})
|
|
||||||
description: "A SNMP target has disappeared for more the 15 min.\n VALUE = {{ $value }}"
|
|
||||||
- name: DHCP
|
|
||||||
rules:
|
|
||||||
- alert: DhcpFuxSharedFailed
|
|
||||||
expr: script_success{script="check_dhcp_fux_shared"} == 0
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: DHCP for Fux Shared stoped working
|
|
||||||
description: "No DHCP lease for the Fux Shared range was received"
|
|
||||||
- alert: DhcpFuxAdminFailed
|
|
||||||
expr: script_success{script_success="check_dhcp_fux_admin"} == 0
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: DHCP for Fux Admin stoped working
|
|
||||||
description: "No DHCP lease for the Fux Admin range was received"
|
|
|
@ -410,7 +410,7 @@ groups:
|
||||||
summary: Prometheus job missing (instance {{ $labels.instance }})
|
summary: Prometheus job missing (instance {{ $labels.instance }})
|
||||||
description: "A Prometheus job has disappeared\n VALUE = {{ $value }}"
|
description: "A Prometheus job has disappeared\n VALUE = {{ $value }}"
|
||||||
- alert: PrometheusTargetMissing
|
- alert: PrometheusTargetMissing
|
||||||
expr: up{job!~"snmp|noc_room_temp"} == 0
|
expr: up == 0
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
@ -418,7 +418,7 @@ groups:
|
||||||
summary: Prometheus target missing (instance {{ $labels.instance }})
|
summary: Prometheus target missing (instance {{ $labels.instance }})
|
||||||
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}"
|
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}"
|
||||||
- alert: PrometheusAllTargetsMissing
|
- alert: PrometheusAllTargetsMissing
|
||||||
expr: sum by (job) (up{job!~"snmp|noc_room_temp"}) == 0
|
expr: sum by (job) (up) == 0
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue