custom alerts for CI VMs
its expected for some VMs to have high Read / Write rates for some time so this is a custom alerts for ours CI VMs
This commit is contained in:
parent
3284fae62a
commit
a386f9e2eb
1 changed files with 19 additions and 2 deletions
|
@ -46,7 +46,7 @@ groups:
|
|||
description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}"
|
||||
# Have different disk read and write rate alerts for VMs and physical machines.
|
||||
- alert: VirtualHostUnusualDiskReadRate
|
||||
expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename=~".+"}
|
||||
expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker"}
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -54,13 +54,30 @@ groups:
|
|||
summary: Virtual host unusual disk read rate (instance {{ $labels.instance }})
|
||||
description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}"
|
||||
- alert: VirtualHostUnusualDiskWriteRate
|
||||
expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+"}
|
||||
expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker"}
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Virtual host unusual disk write rate (instance {{ $labels.instance }})
|
||||
description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}"
|
||||
# Some VMs are expected to have high Read / Write rates z.B. CI servers
|
||||
- alert: VirtualHostUnusualDiskReadRate
|
||||
expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename="forgejo-actions-runner", nodename="woodpecker"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Virtual host unusual disk read rate for 10 min (instance {{ $labels.instance }})
|
||||
description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}"
|
||||
- alert: VirtualHostUnusualDiskWriteRate
|
||||
expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename="forgejo-actions-runner", nodename="woodpecker"}
|
||||
for: 4m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Virtual host unusual disk write rate for 4 min (instance {{ $labels.instance }})
|
||||
description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}"
|
||||
- alert: PhysicalHostUnusualDiskReadRate
|
||||
expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"}
|
||||
for: 20m
|
||||
|
|
Loading…
Reference in a new issue