diff --git a/playbooks/files/chaosknoten/configs/grafana/docker_compose/prometheus_alerts.rules.yaml b/playbooks/files/chaosknoten/configs/grafana/docker_compose/prometheus_alerts.rules.yaml index 284c7ec..e638248 100644 --- a/playbooks/files/chaosknoten/configs/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/playbooks/files/chaosknoten/configs/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -21,13 +21,13 @@ groups: description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}" # You may want to increase the alert manager 'repeat_interval' for this type of alert to daily or weekly - alert: HostMemoryIsUnderutilized - expr: (100 - (avg_over_time(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + expr: (100 - (avg_over_time(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 1w labels: severity: info annotations: summary: Host Memory is underutilized (instance {{ $labels.instance }}) - description: "Node memory is < 20% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n VALUE = {{ $value }}" + description: "Node memory is < 10% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n VALUE = {{ $value }}" - alert: HostUnusualNetworkThroughputIn expr: (sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 5m