From fce4c2f73b49ab34f3061edd91a7e3c569b7823e Mon Sep 17 00:00:00 2001 From: June <june@jsts.xyz> Date: Tue, 18 Feb 2025 15:38:07 +0100 Subject: [PATCH] grafana(host): account in Prom. hyperv. disk alerts for longer backups Set duration for Prometheus hypervisor disk rw rate and hard disk io alerts to 2h to account for the very long running (over 90m) backup job. --- .../grafana/docker_compose/prometheus_alerts.rules.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index f684385..5ec53b8 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -166,7 +166,7 @@ groups: # Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.). - alert: HypervisorHostUnusualDiskReadRate expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: @@ -174,7 +174,7 @@ groups: description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - alert: HypervisorHostUnusualDiskWriteRate expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: @@ -256,7 +256,7 @@ groups: # Since hard disks on the hypervisor can easily have their IO saturated by hypervisor tasks (backups, moving VMs, etc.), alert when the IO is above the regular threshold for a very long time. - alert: HypervisorHostUnusualHardDiskIo expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: