grafana: set dur. for Prom. hyperv. disk rw rate and hdd io aler. to 90m

Set duration for Prometheus hypervisor disk rw rate and hard disk io
alerts to 90m to account for the very long running (over an hour) backup
job.
This commit is contained in:
June 2025-02-15 06:05:44 +01:00
parent 1bae6234ae
commit ac7e8bb6f2
Signed by: june
SSH key fingerprint: SHA256:o9EAq4Y9N9K0pBQeBTqhSDrND5E7oB+60ZNx0U1yPe0

View file

@ -166,7 +166,7 @@ groups:
# Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.).
- alert: HypervisorHostUnusualDiskReadRate
expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"}
for: 60m
for: 90m
labels:
severity: warning
annotations:
@ -174,7 +174,7 @@ groups:
description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}"
- alert: HypervisorHostUnusualDiskWriteRate
expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"}
for: 60m
for: 90m
labels:
severity: warning
annotations:
@ -256,7 +256,7 @@ groups:
# Since hard disks on the hypervisor can easily have their IO saturated by hypervisor tasks (backups, moving VMs, etc.), alert when the IO is above the regular threshold for a very long time.
- alert: HypervisorHostUnusualHardDiskIo
expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"}
for: 50m
for: 90m
labels:
severity: warning
annotations: