grafana: add email alerts, and fix some rules
Some checks failed
/ Ansible Lint (push) Failing after 2m0s

This commit is contained in:
chris 2025-06-10 21:22:53 +02:00
commit 0a50ee470a
Signed by: c6ristian
SSH key fingerprint: SHA256:B3m+yzpaxGXSEcDBpPHfvza/DNC0wuX+CKMeGq8wgak
4 changed files with 42 additions and 23 deletions

View file

@ -627,7 +627,7 @@ groups:
summary: Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }})
description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n VALUE = {{ $value }}"
- alert: PrometheusTimeseriesCardinality
expr: label_replace(count by(__name__) ({__name__=~".+"}), "name", "$1", "__name__", "(.+)") > 10000
expr: label_replace(count by(__name__) ({__name__=~".+"}), "name", "$1", "__name__", "(.+)") > 15000
for: 0m
labels:
severity: warning