diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 5ec53b8..4aac2dc 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -196,9 +196,9 @@ groups: # Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users. - alert: HostDiskWillFillIn24Hours expr: ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m + for: 5m labels: - severity: warning + severity: critical annotations: summary: Host disk will fill in 24 hours (instance {{ $labels.instance }}) description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}" @@ -212,9 +212,9 @@ groups: description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}" - alert: HostInodesWillFillIn24Hours expr: (node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m + for: 5m labels: - severity: warning + severity: critical annotations: summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }}) description: "Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n VALUE = {{ $value }}" @@ -362,7 +362,7 @@ groups: expr: (node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 0m labels: - severity: warning + severity: critical annotations: summary: Host systemd service crashed (instance {{ $labels.instance }}) description: "systemd service crashed\n VALUE = {{ $value }}"