grafana: get alertmanager to be more chill
Some checks are pending
/ Ansible Lint (push) Waiting to run
Some checks are pending
/ Ansible Lint (push) Waiting to run
a bit of help to deal with alert fatigue
This commit is contained in:
parent
8f7990acc0
commit
11779ab21d
2 changed files with 8 additions and 19 deletions
|
|
@ -7,7 +7,7 @@ route:
|
|||
group_by: [ "alertname", "site", "type", "hypervisor" ]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 6h
|
||||
repeat_interval: 26h
|
||||
routes:
|
||||
- receiver: "null"
|
||||
matchers:
|
||||
|
|
@ -16,49 +16,38 @@ route:
|
|||
matchers:
|
||||
- org = "ccchh"
|
||||
- severity = "critical",
|
||||
repeat_interval: 18h
|
||||
repeat_interval: 26h
|
||||
continue: true
|
||||
- receiver: ntfy-ccchh
|
||||
matchers:
|
||||
- org = "ccchh"
|
||||
- severity =~ "info|warning",
|
||||
repeat_interval: 36h
|
||||
repeat_interval: 52h
|
||||
continue: true
|
||||
- receiver: ntfy-fux-critical
|
||||
matchers:
|
||||
- org = "fux"
|
||||
- severity = "critical",
|
||||
repeat_interval: 18h
|
||||
repeat_interval: 26h
|
||||
continue: true
|
||||
- receiver: email-fux-critical
|
||||
matchers:
|
||||
- org = "fux"
|
||||
- severity = "critical",
|
||||
repeat_interval: 36h
|
||||
repeat_interval: 52h
|
||||
continue: true
|
||||
- receiver: ntfy-fux
|
||||
matchers:
|
||||
- org = "fux"
|
||||
- severity =~ "info|warning",
|
||||
repeat_interval: 36h
|
||||
repeat_interval: 52h
|
||||
continue: true
|
||||
- receiver: ccchh-infrastructure-alerts
|
||||
matchers:
|
||||
- org = "ccchh"
|
||||
- severity =~ "info|warning|critical"
|
||||
|
||||
templates:
|
||||
- "/etc/alertmanager/templates/*.tmpl"
|
||||
|
||||
receivers:
|
||||
- name: "null"
|
||||
- name: "ccchh-infrastructure-alerts"
|
||||
telegram_configs:
|
||||
- send_resolved: true
|
||||
bot_token: {{ secret__alertmanager_telegram_bot_token }}
|
||||
chat_id: -1002434372415
|
||||
parse_mode: HTML
|
||||
message: {{ "'{{ template \"alert-message.telegram.ccchh\" . }}'" }}
|
||||
|
||||
- name: "ntfy-ccchh-critical"
|
||||
webhook_configs:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ services:
|
|||
- prom_data:/prometheus
|
||||
|
||||
alertmanager:
|
||||
image: docker.io/prom/alertmanager:v0.30.0
|
||||
image: docker.io/prom/alertmanager:v0.30.1
|
||||
container_name: alertmanager
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/alertmanager.yaml'
|
||||
|
|
@ -59,7 +59,7 @@ services:
|
|||
- /dev/null:/etc/prometheus/pve.yml
|
||||
|
||||
loki:
|
||||
image: docker.io/grafana/loki:3.6.3
|
||||
image: docker.io/grafana/loki:3.6.4
|
||||
container_name: loki
|
||||
ports:
|
||||
- 13100:3100
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue