grafana: get alertmanager to be more chill
Some checks are pending
/ Ansible Lint (push) Waiting to run
Some checks are pending
/ Ansible Lint (push) Waiting to run
a bit of help to deal with alert fatigue
This commit is contained in:
parent
8f7990acc0
commit
11779ab21d
2 changed files with 8 additions and 19 deletions
|
|
@ -7,7 +7,7 @@ route:
|
||||||
group_by: [ "alertname", "site", "type", "hypervisor" ]
|
group_by: [ "alertname", "site", "type", "hypervisor" ]
|
||||||
group_wait: 30s
|
group_wait: 30s
|
||||||
group_interval: 5m
|
group_interval: 5m
|
||||||
repeat_interval: 6h
|
repeat_interval: 26h
|
||||||
routes:
|
routes:
|
||||||
- receiver: "null"
|
- receiver: "null"
|
||||||
matchers:
|
matchers:
|
||||||
|
|
@ -16,49 +16,38 @@ route:
|
||||||
matchers:
|
matchers:
|
||||||
- org = "ccchh"
|
- org = "ccchh"
|
||||||
- severity = "critical",
|
- severity = "critical",
|
||||||
repeat_interval: 18h
|
repeat_interval: 26h
|
||||||
continue: true
|
continue: true
|
||||||
- receiver: ntfy-ccchh
|
- receiver: ntfy-ccchh
|
||||||
matchers:
|
matchers:
|
||||||
- org = "ccchh"
|
- org = "ccchh"
|
||||||
- severity =~ "info|warning",
|
- severity =~ "info|warning",
|
||||||
repeat_interval: 36h
|
repeat_interval: 52h
|
||||||
continue: true
|
continue: true
|
||||||
- receiver: ntfy-fux-critical
|
- receiver: ntfy-fux-critical
|
||||||
matchers:
|
matchers:
|
||||||
- org = "fux"
|
- org = "fux"
|
||||||
- severity = "critical",
|
- severity = "critical",
|
||||||
repeat_interval: 18h
|
repeat_interval: 26h
|
||||||
continue: true
|
continue: true
|
||||||
- receiver: email-fux-critical
|
- receiver: email-fux-critical
|
||||||
matchers:
|
matchers:
|
||||||
- org = "fux"
|
- org = "fux"
|
||||||
- severity = "critical",
|
- severity = "critical",
|
||||||
repeat_interval: 36h
|
repeat_interval: 52h
|
||||||
continue: true
|
continue: true
|
||||||
- receiver: ntfy-fux
|
- receiver: ntfy-fux
|
||||||
matchers:
|
matchers:
|
||||||
- org = "fux"
|
- org = "fux"
|
||||||
- severity =~ "info|warning",
|
- severity =~ "info|warning",
|
||||||
repeat_interval: 36h
|
repeat_interval: 52h
|
||||||
continue: true
|
continue: true
|
||||||
- receiver: ccchh-infrastructure-alerts
|
|
||||||
matchers:
|
|
||||||
- org = "ccchh"
|
|
||||||
- severity =~ "info|warning|critical"
|
|
||||||
|
|
||||||
templates:
|
templates:
|
||||||
- "/etc/alertmanager/templates/*.tmpl"
|
- "/etc/alertmanager/templates/*.tmpl"
|
||||||
|
|
||||||
receivers:
|
receivers:
|
||||||
- name: "null"
|
- name: "null"
|
||||||
- name: "ccchh-infrastructure-alerts"
|
|
||||||
telegram_configs:
|
|
||||||
- send_resolved: true
|
|
||||||
bot_token: {{ secret__alertmanager_telegram_bot_token }}
|
|
||||||
chat_id: -1002434372415
|
|
||||||
parse_mode: HTML
|
|
||||||
message: {{ "'{{ template \"alert-message.telegram.ccchh\" . }}'" }}
|
|
||||||
|
|
||||||
- name: "ntfy-ccchh-critical"
|
- name: "ntfy-ccchh-critical"
|
||||||
webhook_configs:
|
webhook_configs:
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ services:
|
||||||
- prom_data:/prometheus
|
- prom_data:/prometheus
|
||||||
|
|
||||||
alertmanager:
|
alertmanager:
|
||||||
image: docker.io/prom/alertmanager:v0.30.0
|
image: docker.io/prom/alertmanager:v0.30.1
|
||||||
container_name: alertmanager
|
container_name: alertmanager
|
||||||
command:
|
command:
|
||||||
- '--config.file=/etc/alertmanager/alertmanager.yaml'
|
- '--config.file=/etc/alertmanager/alertmanager.yaml'
|
||||||
|
|
@ -59,7 +59,7 @@ services:
|
||||||
- /dev/null:/etc/prometheus/pve.yml
|
- /dev/null:/etc/prometheus/pve.yml
|
||||||
|
|
||||||
loki:
|
loki:
|
||||||
image: docker.io/grafana/loki:3.6.3
|
image: docker.io/grafana/loki:3.6.4
|
||||||
container_name: loki
|
container_name: loki
|
||||||
ports:
|
ports:
|
||||||
- 13100:3100
|
- 13100:3100
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue