From 5016407cefa03926da2cd1667692bffbe8240fa0 Mon Sep 17 00:00:00 2001 From: June Date: Thu, 6 Feb 2025 00:12:03 +0100 Subject: [PATCH 01/46] grafana: group prometheus alert rules for better organization --- .../prometheus_alerts.rules.yaml | 254 ++++++++++-------- 1 file changed, 135 insertions(+), 119 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 65b3590..9b1ee26 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -1,7 +1,7 @@ # Links & Resources: # - https://samber.github.io/awesome-prometheus-alerts/rules groups: - - name: node-exporter + - name: node-exporter-memory rules: - alert: HostOutOfMemory expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} @@ -28,6 +28,41 @@ groups: annotations: summary: Host Memory is underutilized (instance {{ $labels.instance }}) description: "Node memory is < 10% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n VALUE = {{ $value }}" + - alert: HostSwapIsFillingUp + expr: ((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 2m + labels: + severity: warning + annotations: + summary: Host swap is filling up (instance {{ $labels.instance }}) + description: "Swap is filling up (>80%)\n VALUE = {{ $value }}" + - alert: HostOomKillDetected + expr: (increase(node_vmstat_oom_kill[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 0m + labels: + severity: warning + annotations: + summary: Host OOM kill detected (instance {{ $labels.instance }}) + description: "OOM kill detected\n VALUE = {{ $value }}" + - alert: HostEdacCorrectableErrorsDetected + expr: (increase(node_edac_correctable_errors_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 0m + labels: + severity: info + annotations: + summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}" + - alert: HostEdacUncorrectableErrorsDetected + expr: (node_edac_uncorrectable_errors_total > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 0m + labels: + severity: warning + annotations: + summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}" + + - name: node-exporter-network + rules: - alert: HostUnusualNetworkThroughputIn expr: (sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 5m @@ -44,6 +79,41 @@ groups: annotations: summary: Host unusual network throughput out (instance {{ $labels.instance }}) description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}" + - alert: HostNetworkReceiveErrors + expr: (rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Receive Errors (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n VALUE = {{ $value }}" + - alert: HostNetworkTransmitErrors + expr: (rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Transmit Errors (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.\n VALUE = {{ $value }}" + - alert: HostNetworkBondDegraded + expr: ((node_bonding_active - node_bonding_slaves) != 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Bond Degraded (instance {{ $labels.instance }}) + description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n VALUE = {{ $value }}" + - alert: HostConntrackLimit + expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 5m + labels: + severity: warning + annotations: + summary: Host conntrack limit (instance {{ $labels.instance }}) + description: "The number of conntrack is approaching limit\n VALUE = {{ $value }}" + + - name: node-exporter-disk + rules: # Have different disk read and write rate alerts for VMs and physical machines. - alert: VirtualHostUnusualDiskReadRate expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker"} @@ -156,6 +226,50 @@ groups: annotations: summary: Host unusual disk write latency (instance {{ $labels.instance }}) description: "Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}" + # Have different disk IO alerts for VMs and physical machines and for physical machines different ones for hard and other disks. + - alert: PhysicalHostUnusualHardDiskIo + expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.75) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} + for: 5m + labels: + severity: warning + annotations: + summary: Physical host unusual hard disk IO (instance {{ $labels.instance }}) + description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" + - alert: PhysicalHostUnusualOtherDiskIo + expr: (rate(node_disk_io_time_seconds_total{device!~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} + for: 5m + labels: + severity: warning + annotations: + summary: Physical host unusual other (non-hard) disk IO (instance {{ $labels.instance }}) + description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" + - alert: VirtualHostUnusualDiskIo + expr: (rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+"} + for: 5m + labels: + severity: warning + annotations: + summary: Virtual host unusual disk IO (instance {{ $labels.instance }}) + description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" + - alert: HostRaidArrayGotInactive + expr: (node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 0m + labels: + severity: critical + annotations: + summary: Host RAID array got inactive (instance {{ $labels.instance }}) + description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.\n VALUE = {{ $value }}" + - alert: HostRaidDiskFailure + expr: (node_md_disks{state="failed"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 2m + labels: + severity: warning + annotations: + summary: Host RAID disk failure (instance {{ $labels.instance }}) + description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n VALUE = {{ $value }}" + + - name: node-exporter-cpu + rules: - alert: HostHighCpuLoad expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 10m @@ -190,31 +304,6 @@ groups: annotations: summary: Host CPU high iowait (instance {{ $labels.instance }}) description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}" - # Have different disk IO alerts for VMs and physical machines and for physical machines different ones for hard and other disks. - - alert: PhysicalHostUnusualHardDiskIo - expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.75) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} - for: 5m - labels: - severity: warning - annotations: - summary: Physical host unusual hard disk IO (instance {{ $labels.instance }}) - description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" - - alert: PhysicalHostUnusualOtherDiskIo - expr: (rate(node_disk_io_time_seconds_total{device!~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} - for: 5m - labels: - severity: warning - annotations: - summary: Physical host unusual other (non-hard) disk IO (instance {{ $labels.instance }}) - description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" - - alert: VirtualHostUnusualDiskIo - expr: (rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+"} - for: 5m - labels: - severity: warning - annotations: - summary: Virtual host unusual disk IO (instance {{ $labels.instance }}) - description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" # # x2 context switches is an arbitrary number. # # The alert threshold depends on the nature of the application. # # Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58 @@ -226,14 +315,28 @@ groups: # annotations: # summary: Host context switching high (instance {{ $labels.instance }}) # description: "Context switching is growing on the node (twice the daily average during the last 15m)\n VALUE = {{ $value }}" - - alert: HostSwapIsFillingUp - expr: ((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m + + - name: node-exporter-physical + rules: + - alert: HostNodeOvertemperatureAlarm + expr: ((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 0m + labels: + severity: critical + annotations: + summary: Host node overtemperature alarm (instance {{ $labels.instance }}) + description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}" + - alert: HostKernelVersionDeviations + expr: (count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + for: 6h labels: severity: warning annotations: - summary: Host swap is filling up (instance {{ $labels.instance }}) - description: "Swap is filling up (>80%)\n VALUE = {{ $value }}" + summary: Host kernel version deviations (instance {{ $labels.instance }}) + description: "Different kernel versions are running\n VALUE = {{ $value }}" + + - name: node-exporter-misc + rules: - alert: HostSystemdServiceCrashed expr: (node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 0m @@ -250,94 +353,6 @@ groups: annotations: summary: Host physical component too hot (instance {{ $labels.instance }}) description: "Physical hardware component too hot\n VALUE = {{ $value }}" - - alert: HostNodeOvertemperatureAlarm - expr: ((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 0m - labels: - severity: critical - annotations: - summary: Host node overtemperature alarm (instance {{ $labels.instance }}) - description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}" - - alert: HostRaidArrayGotInactive - expr: (node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 0m - labels: - severity: critical - annotations: - summary: Host RAID array got inactive (instance {{ $labels.instance }}) - description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.\n VALUE = {{ $value }}" - - alert: HostRaidDiskFailure - expr: (node_md_disks{state="failed"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m - labels: - severity: warning - annotations: - summary: Host RAID disk failure (instance {{ $labels.instance }}) - description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n VALUE = {{ $value }}" - - alert: HostKernelVersionDeviations - expr: (count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 6h - labels: - severity: warning - annotations: - summary: Host kernel version deviations (instance {{ $labels.instance }}) - description: "Different kernel versions are running\n VALUE = {{ $value }}" - - alert: HostOomKillDetected - expr: (increase(node_vmstat_oom_kill[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 0m - labels: - severity: warning - annotations: - summary: Host OOM kill detected (instance {{ $labels.instance }}) - description: "OOM kill detected\n VALUE = {{ $value }}" - - alert: HostEdacCorrectableErrorsDetected - expr: (increase(node_edac_correctable_errors_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 0m - labels: - severity: info - annotations: - summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }}) - description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}" - - alert: HostEdacUncorrectableErrorsDetected - expr: (node_edac_uncorrectable_errors_total > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 0m - labels: - severity: warning - annotations: - summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}) - description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}" - - alert: HostNetworkReceiveErrors - expr: (rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m - labels: - severity: warning - annotations: - summary: Host Network Receive Errors (instance {{ $labels.instance }}) - description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n VALUE = {{ $value }}" - - alert: HostNetworkTransmitErrors - expr: (rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m - labels: - severity: warning - annotations: - summary: Host Network Transmit Errors (instance {{ $labels.instance }}) - description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.\n VALUE = {{ $value }}" - - alert: HostNetworkBondDegraded - expr: ((node_bonding_active - node_bonding_slaves) != 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 2m - labels: - severity: warning - annotations: - summary: Host Network Bond Degraded (instance {{ $labels.instance }}) - description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n VALUE = {{ $value }}" - - alert: HostConntrackLimit - expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} - for: 5m - labels: - severity: warning - annotations: - summary: Host conntrack limit (instance {{ $labels.instance }}) - description: "The number of conntrack is approaching limit\n VALUE = {{ $value }}" - alert: HostClockSkew expr: ((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 10m @@ -362,6 +377,7 @@ groups: annotations: summary: Host requires reboot (instance {{ $labels.instance }}) description: "{{ $labels.instance }} requires a reboot.\n VALUE = {{ $value }}" + - name: prometheus rules: - alert: PrometheusJobMissing From 9e77a41e3c299e6f2620b43f47d93dd3812314ae Mon Sep 17 00:00:00 2001 From: June Date: Thu, 6 Feb 2025 01:05:05 +0100 Subject: [PATCH 02/46] grafana: differentiate prometheus disk rate alerts by host task type Not by a mix of host task type (CI server or not) and whether or not the host is virtual or physical. Also only differentiate on the duration not the rate, to not accidentally exclude slow hard disks. --- .../prometheus_alerts.rules.yaml | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 9b1ee26..8cfd99a 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -114,56 +114,60 @@ groups: - name: node-exporter-disk rules: - # Have different disk read and write rate alerts for VMs and physical machines. - - alert: VirtualHostUnusualDiskReadRate - expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker"} + # General high disk read and write rate alerts. + # Excluding: hypervisor hosts, CI hosts + - alert: HostUnusualDiskReadRate + expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker", nodename!="chaosknoten"} for: 5m labels: severity: warning annotations: - summary: Virtual host unusual disk read rate (instance {{ $labels.instance }}) + summary: Host unusual disk read rate (instance {{ $labels.instance }}) description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - - alert: VirtualHostUnusualDiskWriteRate - expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker"} + - alert: HostUnusualDiskWriteRate + expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+", nodename!="forgejo-actions-runner", nodename!="woodpecker", nodename!="chaosknoten"} for: 2m labels: severity: warning annotations: - summary: Virtual host unusual disk write rate (instance {{ $labels.instance }}) + summary: Host unusual disk write rate (instance {{ $labels.instance }}) description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}" - # Some VMs are expected to have high Read / Write rates z.B. CI servers - - alert: VirtualHostUnusualDiskReadRate - expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{ype="virtual_machine", nodename="forgejo-actions-runner", nodename="woodpecker"} + # CI hosts high disk read and write alerts. + # Longer intervals to account for disk intensive CI tasks. + - alert: CIHostUnusualDiskReadRate + expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="forgejo-actions-runner", nodename="woodpecker"} for: 10m labels: severity: warning annotations: - summary: Virtual host unusual disk read rate for 10 min (instance {{ $labels.instance }}) + summary: CI host unusual disk read rate for 10 min (instance {{ $labels.instance }}) description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - alert: VirtualHostUnusualDiskWriteRate - expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename="forgejo-actions-runner", nodename="woodpecker"} + expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="forgejo-actions-runner", nodename="woodpecker"} for: 4m labels: severity: warning annotations: - summary: Virtual host unusual disk write rate for 4 min (instance {{ $labels.instance }}) + summary: CI host unusual disk write rate for 4 min (instance {{ $labels.instance }}) description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}" - - alert: PhysicalHostUnusualDiskReadRate - expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} + # Hypervisor host high disk read and write alerts. + # Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.). + - alert: HypervisorHostUnusualDiskReadRate + expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} + for: 30m + labels: + severity: warning + annotations: + summary: Hypervisor host unusual disk read rate (instance {{ $labels.instance }}) + description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" + - alert: HypervisorHostUnusualDiskWriteRate + expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} for: 20m labels: severity: warning annotations: - summary: Physical host unusual disk read rate (instance {{ $labels.instance }}) - description: "Disk is probably reading too much data (> 100 MB/s)\n VALUE = {{ $value }}" - - alert: PhysicalHostUnusualDiskWriteRate - expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} - for: 15m - labels: - severity: warning - annotations: - summary: Physical host unusual disk write rate (instance {{ $labels.instance }}) - description: "Disk is probably writing too much data (> 100 MB/s)\n VALUE = {{ $value }}" + summary: Hypervisor host unusual disk write rate (instance {{ $labels.instance }}) + description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}" # Please add ignored mountpoints in node_exporter parameters like # "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)". # Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users. From ee66631c2d348ba7e924353f7314c90dc8195a0b Mon Sep 17 00:00:00 2001 From: June Date: Thu, 6 Feb 2025 01:13:10 +0100 Subject: [PATCH 03/46] grafana: diff. prometheus disk io alerts by host task and disk type Differentiate by host task (hypervisor or not) and disk (hard disk or not) type not by whether or not the host is physical and virtual and then by disk type. This is in line with the disk rate alerts changes and allows for fine-grained adjustments based on the host task type, which actually matters for these alerts. --- .../prometheus_alerts.rules.yaml | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 8cfd99a..8c8f374 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -230,30 +230,35 @@ groups: annotations: summary: Host unusual disk write latency (instance {{ $labels.instance }}) description: "Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}" - # Have different disk IO alerts for VMs and physical machines and for physical machines different ones for hard and other disks. - - alert: PhysicalHostUnusualHardDiskIo - expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.75) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} + # General unusual disk io alerts. + # Excluding: hypervisor hosts + - alert: HostUnusualDiskIo + expr: (rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+", nodename!="chaosknoten"} for: 5m labels: severity: warning annotations: - summary: Physical host unusual hard disk IO (instance {{ $labels.instance }}) + summary: Host unusual disk IO (instance {{ $labels.instance }}) description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" - - alert: PhysicalHostUnusualOtherDiskIo - expr: (rate(node_disk_io_time_seconds_total{device!~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="physical_machine", nodename=~".+"} - for: 5m + # Hypervisor host unusual hard disk io alerts. + # Since hard disks on the hypervisor can easily have their IO saturated by hypervisor tasks (backups, moving VMs, etc.), alert when the IO is above the regular threshold for a very long time. + - alert: HypervisorHostUnusualHardDiskIo + expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} + for: 50m labels: severity: warning annotations: - summary: Physical host unusual other (non-hard) disk IO (instance {{ $labels.instance }}) + summary: Hypervisor host unusual hard disk IO (instance {{ $labels.instance }}) description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" - - alert: VirtualHostUnusualDiskIo - expr: (rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{type="virtual_machine", nodename=~".+"} + # Hypervisor host unusual other (non-hard) disk io alerts. + # This is the same as the regular unsual disk io alerts. + - alert: HypervisorHostUnusualOtherDiskIo + expr: (rate(node_disk_io_time_seconds_total{device!~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} for: 5m labels: severity: warning annotations: - summary: Virtual host unusual disk IO (instance {{ $labels.instance }}) + summary: Hypervisor host unusual other (non-hard) disk IO (instance {{ $labels.instance }}) description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}" - alert: HostRaidArrayGotInactive expr: (node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} From c4e35c1adfcd700061f208458586d9f2c5f36a38 Mon Sep 17 00:00:00 2001 From: June Date: Thu, 6 Feb 2025 01:34:45 +0100 Subject: [PATCH 04/46] grafana: pull out prom. net. rec. err. alerts for OPNs. to ex. wg int. Pull out prometheus network receive error alerts for OPNsense to exclude its WireGuard interfaces, which like to throw errors, but which aren't of importance. --- .../docker_compose/prometheus_alerts.rules.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 8c8f374..3d9d7a1 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -79,14 +79,26 @@ groups: annotations: summary: Host unusual network throughput out (instance {{ $labels.instance }}) description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}" + # General network receive error alerts. + # Excluding: OPNsense hosts - alert: HostNetworkReceiveErrors - expr: (rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} + expr: (rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+", nodename!="OPNsense"} for: 2m labels: severity: warning annotations: summary: Host Network Receive Errors (instance {{ $labels.instance }}) description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n VALUE = {{ $value }}" + # OPNsense network receive error alerts. + # This is the same as the regular network receive error alerts, but excluding the WireGuard interfaces as they like to throw errors, but which aren't of importance. + - alert: OPNsenseHostNetworkReceiveErrors + expr: (rate(node_network_receive_errs_total{device!~"wg.+"}[2m]) / rate(node_network_receive_packets_total{device!~"wg.+"}[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename="OPNsense"} + for: 2m + labels: + severity: warning + annotations: + summary: OPNsense host Network Receive Errors (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n VALUE = {{ $value }}" - alert: HostNetworkTransmitErrors expr: (rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 2m From 40cddb67b40a245d3416816a5fe330b152203d87 Mon Sep 17 00:00:00 2001 From: June Date: Thu, 6 Feb 2025 19:17:21 +0100 Subject: [PATCH 05/46] grafana: account for long backup jobs in Prom. hyperv. disk rw rate al. --- .../grafana/docker_compose/prometheus_alerts.rules.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 3d9d7a1..5cc54e9 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -166,7 +166,7 @@ groups: # Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.). - alert: HypervisorHostUnusualDiskReadRate expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 30m + for: 60m labels: severity: warning annotations: @@ -174,7 +174,7 @@ groups: description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - alert: HypervisorHostUnusualDiskWriteRate expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 20m + for: 60m labels: severity: warning annotations: From bdbd9ce19505d974a76c4f6a5597e6d6ed813f9a Mon Sep 17 00:00:00 2001 From: June Date: Mon, 10 Feb 2025 23:40:39 +0100 Subject: [PATCH 06/46] eh22-wiki: setup EH22 wiki using Ansible by copying and mod. wiki config Also introduce wiki_hosts group for applying dokuwiki role to multiple hosts. --- .../chaosknoten/host_vars/eh22-wiki.yaml | 11 ++++ inventories/chaosknoten/hosts.yaml | 13 ++++ playbooks/deploy.yaml | 2 +- .../eh22-wiki/nginx/eh22.easterhegg.eu.conf | 66 +++++++++++++++++++ .../nginx/acme_challenge.conf | 2 +- .../public-reverse-proxy/nginx/nginx.conf | 2 +- 6 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 inventories/chaosknoten/host_vars/eh22-wiki.yaml create mode 100644 resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf diff --git a/inventories/chaosknoten/host_vars/eh22-wiki.yaml b/inventories/chaosknoten/host_vars/eh22-wiki.yaml new file mode 100644 index 0000000..a8814c0 --- /dev/null +++ b/inventories/chaosknoten/host_vars/eh22-wiki.yaml @@ -0,0 +1,11 @@ +nginx__version_spec: "" +nginx__configurations: + - name: eh22.easterhegg.eu + content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf') }}" + +certbot__version_spec: "" +certbot__acme_account_email_address: j+letsencrypt-ccchh@jsts.xyz +certbot__certificate_domains: + - "eh22.easterhegg.eu" +certbot__new_cert_commands: + - "systemctl reload nginx.service" diff --git a/inventories/chaosknoten/hosts.yaml b/inventories/chaosknoten/hosts.yaml index 432f357..911a87d 100644 --- a/inventories/chaosknoten/hosts.yaml +++ b/inventories/chaosknoten/hosts.yaml @@ -10,6 +10,10 @@ all: ansible_host: cloud-intern.hamburg.ccc.de ansible_user: chaos ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + eh22-wiki: + ansible_host: eh22-wiki-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de grafana: ansible_host: grafana-intern.hamburg.ccc.de ansible_user: chaos @@ -55,6 +59,7 @@ all: hosts: ccchoir: cloud: + eh22-wiki: grafana: keycloak: lists: @@ -83,6 +88,7 @@ all: nginx_hosts: hosts: ccchoir: + eh22-wiki: grafana: tickets: keycloak: @@ -100,6 +106,7 @@ all: certbot_hosts: hosts: ccchoir: + eh22-wiki: grafana: tickets: keycloak: @@ -113,6 +120,7 @@ all: prometheus_node_exporter_hosts: hosts: ccchoir: + eh22-wiki: tickets: keycloak: onlyoffice: @@ -123,6 +131,7 @@ all: infrastructure_authorized_keys_hosts: hosts: ccchoir: + eh22-wiki: grafana: tickets: cloud: @@ -133,3 +142,7 @@ all: public-reverse-proxy: wiki: zammad: + wiki_hosts: + hosts: + eh22-wiki: + wiki: diff --git a/playbooks/deploy.yaml b/playbooks/deploy.yaml index 91cdf0f..6955b02 100644 --- a/playbooks/deploy.yaml +++ b/playbooks/deploy.yaml @@ -25,7 +25,7 @@ - foobazdmx - name: Ensure Dokuwiki config - hosts: wiki + hosts: wiki_hosts roles: - dokuwiki diff --git a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf new file mode 100644 index 0000000..631ba7d --- /dev/null +++ b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf @@ -0,0 +1,66 @@ +# partly generated 2022-01-08, Mozilla Guideline v5.6, nginx 1.17.7, OpenSSL 1.1.1k, intermediate configuration +# https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=intermediate&openssl=1.1.1k&guideline=5.6 +server { + # Listen on a custom port for the proxy protocol. + listen 8443 ssl http2 proxy_protocol; + # Make use of the ngx_http_realip_module to set the $remote_addr and + # $remote_port to the client address and client port, when using proxy + # protocol. + # First set our proxy protocol proxy as trusted. + set_real_ip_from 172.31.17.140; + # Then tell the realip_module to get the addreses from the proxy protocol + # header. + real_ip_header proxy_protocol; + + server_name eh22.easterhegg.eu; + + ssl_certificate /etc/letsencrypt/live/eh22.easterhegg.eu/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/eh22.easterhegg.eu/privkey.pem; + # verify chain of trust of OCSP response using Root CA and Intermediate certs + ssl_trusted_certificate /etc/letsencrypt/live/eh22.easterhegg.eu/chain.pem; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + # Maximum file upload size is 20MB - change accordingly if needed + # See: https://www.dokuwiki.org/faq:uploadsize + client_max_body_size 20M; + client_body_buffer_size 128k; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + # This is https in any case. + proxy_set_header X-Forwarded-Proto https; + + root /var/www/dokuwiki; + index doku.php; + + #Remember to comment the below out when you're installing, and uncomment it when done. + location ~ /(conf/|bin/|inc/|vendor/|install.php) { deny all; } + + #Support for X-Accel-Redirect + location ~ ^/data/ { internal ; } + + location ~ ^/lib.*\.(js|css|gif|png|ico|jpg|jpeg)$ { + expires 365d; + } + + location / { try_files $uri $uri/ @dokuwiki; } + + location @dokuwiki { + # rewrites "doku.php/" out of the URLs if you set the userwrite setting to .htaccess in dokuwiki config page + rewrite ^/_media/(.*) /lib/exe/fetch.php?media=$1 last; + rewrite ^/_detail/(.*) /lib/exe/detail.php?media=$1 last; + rewrite ^/_export/([^/]+)/(.*) /doku.php?do=export_$1&id=$2 last; + rewrite ^/(.*) /doku.php?id=$1&$args last; + } + + location ~ \.php$ { + try_files $uri $uri/ /doku.php; + include fastcgi_params; + fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; + fastcgi_param REDIRECT_STATUS 200; + fastcgi_pass unix:/var/run/php/php-fpm-dokuwiki.sock; + } +} diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf index 30eebc7..d5ae146 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf @@ -35,7 +35,7 @@ map $host $upstream_acme_challenge_host { eh11.easterhegg.eu 172.31.17.151:31820; eh20.easterhegg.eu 172.31.17.151:31820; www.eh20.easterhegg.eu 172.31.17.151:31820; - eh22.easterhegg.eu 172.31.17.159:31820; + eh22.easterhegg.eu 172.31.17.165:31820; easterheggxxxx.hamburg.ccc.de 172.31.17.151:31820; eh2003.hamburg.ccc.de 172.31.17.151:31820; www.eh2003.hamburg.ccc.de 172.31.17.151:31820; diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf index b5f1d98..0529f4c 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf @@ -53,7 +53,7 @@ stream { eh11.easterhegg.eu 172.31.17.151:8443; eh20.easterhegg.eu 172.31.17.151:8443; www.eh20.easterhegg.eu 172.31.17.151:8443; - eh22.easterhegg.eu 172.31.17.159:8443; + eh22.easterhegg.eu 172.31.17.165:8443; easterheggxxxx.hamburg.ccc.de 172.31.17.151:8443; eh2003.hamburg.ccc.de 172.31.17.151:8443; www.eh2003.hamburg.ccc.de 172.31.17.151:8443; From 70d4ce9a2d9069d68d6d87178d7972ca049c8d67 Mon Sep 17 00:00:00 2001 From: June Date: Wed, 12 Feb 2025 19:02:53 +0100 Subject: [PATCH 07/46] eh22-wiki: ensure base for CI deploy of styleguide under /design/ --- playbooks/deploy.yaml | 3 ++ playbooks/ensure_eh22_styleguide_dir.yaml | 40 +++++++++++++++++++ .../eh22-wiki/nginx/eh22.easterhegg.eu.conf | 9 +++++ 3 files changed, 52 insertions(+) create mode 100644 playbooks/ensure_eh22_styleguide_dir.yaml diff --git a/playbooks/deploy.yaml b/playbooks/deploy.yaml index 6955b02..da2937f 100644 --- a/playbooks/deploy.yaml +++ b/playbooks/deploy.yaml @@ -64,3 +64,6 @@ - "o=${distro_id},n=${distro_codename}" - "o=Docker,n=${distro_codename}" - "o=nginx,n=${distro_codename}" + +- name: Run ensure_eh22_styleguide_dir Playbook + ansible.builtin.import_playbook: ensure_eh22_styleguide_dir.yaml diff --git a/playbooks/ensure_eh22_styleguide_dir.yaml b/playbooks/ensure_eh22_styleguide_dir.yaml new file mode 100644 index 0000000..a57f49d --- /dev/null +++ b/playbooks/ensure_eh22_styleguide_dir.yaml @@ -0,0 +1,40 @@ +--- +# TODO: This should really be handled through a role at some point. +# This role is also needed for migrating public-web-static to Ansible. +- name: Ensure base for working EH22 Styleguide CI deployment + hosts: eh22-wiki + tasks: + - name: Ensure deployment user group + ansible.builtin.group: + name: eh22-styleguide-deploy + system: false + become: true + + - name: Ensure deployment user + ansible.builtin.user: + name: eh22-styleguide-deploy + group: eh22-styleguide-deploy + password: '!' + system: false + become: true + + - name: Ensure SSH key is set for deployment user + ansible.posix.authorized_key: + user: eh22-styleguide-deploy + exclusive: true + key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOEgdXYZUq6SUDeKpX0Q8d1WYJ5WduHPMEaRuj0yfJTl deploy key for eh22 styleguide" + become: true + + - name: Ensure deployment directory + ansible.builtin.file: + path: /var/www/eh22-styleguide + state: directory + mode: "0755" + owner: eh22-styleguide-deploy + group: eh22-styleguide-deploy + become: true + + - name: Ensure rsync is present for deployment + ansible.builtin.apt: + name: rsync + become: true diff --git a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf index 631ba7d..3ccbd2e 100644 --- a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf +++ b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf @@ -63,4 +63,13 @@ server { fastcgi_param REDIRECT_STATUS 200; fastcgi_pass unix:/var/run/php/php-fpm-dokuwiki.sock; } + + location = /design { + return 302 https://eh22.easterhegg.eu/design/; + } + + location /design/ { + alias /var/www/eh22-styleguide/; + index index.html; + } } From 9334f70289a98a629a07693cfd2abc1af46b0195 Mon Sep 17 00:00:00 2001 From: June Date: Fri, 14 Feb 2025 04:34:09 +0100 Subject: [PATCH 08/46] eh22-wiki: make automatic dir redirects work for /design Do this by setting port_in_redirect to off. --- .../chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf index 3ccbd2e..5b7f07f 100644 --- a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf +++ b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf @@ -64,11 +64,9 @@ server { fastcgi_pass unix:/var/run/php/php-fpm-dokuwiki.sock; } - location = /design { - return 302 https://eh22.easterhegg.eu/design/; - } - location /design/ { + # Disable port in redirect as NGINX would redirect to the PROXY Protocol port 8443 for locations like https://eh22.easterhegg.eu/design + port_in_redirect off; alias /var/www/eh22-styleguide/; index index.html; } From 1bae6234ae3267accd663cc4e8ff2d727b134545 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Fri, 14 Feb 2025 19:52:19 +0100 Subject: [PATCH 09/46] redirect to eh22 --- resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf b/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf index 90be686..f8a802f 100644 --- a/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf +++ b/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf @@ -38,7 +38,7 @@ server { location = / { #return 302 https://wiki.hamburg.ccc.de/infrastructure:service-overview#tickets_pretix; - return 302 https://tickets.hamburg.ccc.de/hackertours/38c3/; + return 302 https://tickets.hamburg.ccc.de/hackertours/eh22/; } location / { From ac7e8bb6f22e73e591abec4b2000f11291fa2e59 Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 06:05:44 +0100 Subject: [PATCH 10/46] grafana: set dur. for Prom. hyperv. disk rw rate and hdd io aler. to 90m Set duration for Prometheus hypervisor disk rw rate and hard disk io alerts to 90m to account for the very long running (over an hour) backup job. --- .../grafana/docker_compose/prometheus_alerts.rules.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index 5cc54e9..f684385 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -166,7 +166,7 @@ groups: # Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.). - alert: HypervisorHostUnusualDiskReadRate expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 60m + for: 90m labels: severity: warning annotations: @@ -174,7 +174,7 @@ groups: description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - alert: HypervisorHostUnusualDiskWriteRate expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 60m + for: 90m labels: severity: warning annotations: @@ -256,7 +256,7 @@ groups: # Since hard disks on the hypervisor can easily have their IO saturated by hypervisor tasks (backups, moving VMs, etc.), alert when the IO is above the regular threshold for a very long time. - alert: HypervisorHostUnusualHardDiskIo expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 50m + for: 90m labels: severity: warning annotations: From 537ef55b6f0e8086e62d3fb8dde08d3bac17ee44 Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 06:21:14 +0100 Subject: [PATCH 11/46] eh22-wiki: add missing redirect for /design Apparently this is still needed. --- .../chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf index 5b7f07f..d3ed959 100644 --- a/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf +++ b/resources/chaosknoten/eh22-wiki/nginx/eh22.easterhegg.eu.conf @@ -64,6 +64,12 @@ server { fastcgi_pass unix:/var/run/php/php-fpm-dokuwiki.sock; } + location = /design { + # Disable port in redirect as NGINX would redirect to the PROXY Protocol port 8443 for locations like https://eh22.easterhegg.eu/design + port_in_redirect off; + return 302 /design/; + } + location /design/ { # Disable port in redirect as NGINX would redirect to the PROXY Protocol port 8443 for locations like https://eh22.easterhegg.eu/design port_in_redirect off; From fc24bfff5ddb2acc32f92d59fb144381cd471155 Mon Sep 17 00:00:00 2001 From: June Date: Thu, 13 Feb 2025 02:00:32 +0100 Subject: [PATCH 12/46] add redis role for ensuring redis is installed from distro packages This is a requirement for a new netbox role. --- roles/redis/README.md | 15 +++++++++++++++ roles/redis/tasks/main.yaml | 5 +++++ 2 files changed, 20 insertions(+) create mode 100644 roles/redis/README.md create mode 100644 roles/redis/tasks/main.yaml diff --git a/roles/redis/README.md b/roles/redis/README.md new file mode 100644 index 0000000..dd30500 --- /dev/null +++ b/roles/redis/README.md @@ -0,0 +1,15 @@ +# Role `redis` + +Ensures `redis` is installed by installing the distributions package. + +## Supported Distributions + +Should work on Debian-based distributions. + +## Required Arguments + +None. + +## Optional Arguments + +None. diff --git a/roles/redis/tasks/main.yaml b/roles/redis/tasks/main.yaml new file mode 100644 index 0000000..ad70e44 --- /dev/null +++ b/roles/redis/tasks/main.yaml @@ -0,0 +1,5 @@ +- name: Ensure redis is installed + ansible.builtin.apt: + name: + - redis + become: true From 96629953773060c5c60720f55bd3f096b0df7b29 Mon Sep 17 00:00:00 2001 From: June Date: Thu, 13 Feb 2025 03:24:15 +0100 Subject: [PATCH 13/46] add postgresql role for ens. psql and opt. some dbs and users are set up Add postgresql role for ensuring postgresql is installed. Furthermore the role optionally takes some basic configuration to ensure databases with their owners and users are set up as specified. This is a requirement for a new netbox role. --- roles/postgresql/README.md | 37 +++++++++++++++++++++++ roles/postgresql/defaults/main.yaml | 2 ++ roles/postgresql/meta/argument_specs.yaml | 28 +++++++++++++++++ roles/postgresql/tasks/main.yaml | 28 +++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 roles/postgresql/README.md create mode 100644 roles/postgresql/defaults/main.yaml create mode 100644 roles/postgresql/meta/argument_specs.yaml create mode 100644 roles/postgresql/tasks/main.yaml diff --git a/roles/postgresql/README.md b/roles/postgresql/README.md new file mode 100644 index 0000000..6457931 --- /dev/null +++ b/roles/postgresql/README.md @@ -0,0 +1,37 @@ +# Role `postgresql` + +Ensures `postgresql` is installed by installing the distributions package. +Also ensures the optionally given databases and users are set up as specified. + +## Supported Distributions + +Should work on Debian-based distributions. + +## Required Arguments + +None. + +## Optional Arguments + +- `postgresql__dbs`: List of databases with their owner to ensure are set up. +- `postgresql__dbs.*.name`: Name of the database. +- `postgresql__dbs.*.owner`: Owner of the database. +- `postgresql__users`: List of users to ensure are set up. +- `postgresql__users.*.name`: Name of the user. +- `postgresql__users.*.password`: Optional password for the user. + If left unset, the user will have no password set, but can still connect using [peer authentication](https://www.postgresql.org/docs/current/auth-peer.html) on the local system. + (Peer authentication works when a password is set as well.) + +## Example Arguments + +```yaml +postgresql__dbs: + - name: netbox + owner: netbox + - name: foo + owner: bar +postgresql__users: + - name: netbox + password: super_secret + - name: bar +``` diff --git a/roles/postgresql/defaults/main.yaml b/roles/postgresql/defaults/main.yaml new file mode 100644 index 0000000..21fcd46 --- /dev/null +++ b/roles/postgresql/defaults/main.yaml @@ -0,0 +1,2 @@ +postgresql__dbs: [ ] +postgresql__users: [ ] diff --git a/roles/postgresql/meta/argument_specs.yaml b/roles/postgresql/meta/argument_specs.yaml new file mode 100644 index 0000000..28e5813 --- /dev/null +++ b/roles/postgresql/meta/argument_specs.yaml @@ -0,0 +1,28 @@ +argument_specs: + main: + options: + postgresql__dbs: + type: list + elements: dict + required: false + default: [ ] + options: + name: + type: str + required: true + owner: + type: str + required: true + postgresql__users: + type: list + elements: dict + required: false + default: [ ] + options: + name: + type: str + required: true + password: + type: str + required: false + default: "" diff --git a/roles/postgresql/tasks/main.yaml b/roles/postgresql/tasks/main.yaml new file mode 100644 index 0000000..fc61857 --- /dev/null +++ b/roles/postgresql/tasks/main.yaml @@ -0,0 +1,28 @@ +- name: Ensure postgresql is installed + ansible.builtin.apt: + name: + - postgresql + become: true + +- name: Ensure Python library for community.postgresql is installed if needed + ansible.builtin.apt: + name: + - python3-psycopg + become: true + when: postgresql__dbs != [ ] or postgresql__users != [ ] + +- name: Ensure users + community.postgresql.postgresql_user: + name: "{{ item.name }}" + password: "{{ item.password | default('') }}" + become: true + become_user: postgres + loop: "{{ postgresql__users }}" + +- name: Ensure dbs with owners + community.postgresql.postgresql_db: + name: "{{ item.name }}" + owner: "{{ item.owner }}" + become: true + become_user: postgres + loop: "{{ postgresql__dbs }}" From 783c36bcc1730505fbc9eadc22c822a4d23689f3 Mon Sep 17 00:00:00 2001 From: June Date: Fri, 14 Feb 2025 21:43:44 +0100 Subject: [PATCH 14/46] add netbox role for ensuring netbox is deployed as specified The role takes over the deployment of netbox and its dependencies, while still requiring the user to provide the netbox version, db password and config as well as to set up a web server and handle stuff like creating users, etc. --- roles/netbox/README.md | 77 +++++++++++++++++++ roles/netbox/handlers/main.yaml | 24 ++++++ roles/netbox/meta/argument_specs.yaml | 12 +++ roles/netbox/meta/main.yaml | 11 +++ roles/netbox/tasks/main.yaml | 103 ++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 roles/netbox/README.md create mode 100644 roles/netbox/handlers/main.yaml create mode 100644 roles/netbox/meta/argument_specs.yaml create mode 100644 roles/netbox/meta/main.yaml create mode 100644 roles/netbox/tasks/main.yaml diff --git a/roles/netbox/README.md b/roles/netbox/README.md new file mode 100644 index 0000000..38b7968 --- /dev/null +++ b/roles/netbox/README.md @@ -0,0 +1,77 @@ +# `netbox` role + +A role for setting up NetBox. +It automatically pulls in all required dependencies like Redis and PostgreSQL, deploys the provided systemd services and gunicorn config and sets up a PostgreSQL database named `netbox` with an owner named `netbox` and the specified password. +However providing the [NetBox configuration](#netbox-configuration), [setting up a web server like nginx to proxy to gunicorn](#web-server-setup) and tasks like creating users, etc. you have to do yourself. + +## Supported Distributions + +Should work on Debian-based distributions. + +## Required Arguments + +- `netbox__version`: The NetBox version to deploy. +- `netbox__db_password`: The password to use for connection to the database. + This is required since the upgrade script runs as root and therefore peer authentication doesn't work. +- `netbox__config`: The NetBox config to deploy. + See [NetBox Configuration](#netbox-configuration) for more infos. + +## Optional Arguments + +None. + +## NetBox Configuration + +The NetBox configuration should include a connection to Redis as well as a connection to PostgreSQL. +Configuration for the Redis connection: + +```python +REDIS = { + "tasks": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 0, + "SSL": False, + }, + "caching": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 1, + "SSL": False, + }, +} +``` + +Configuration for the PostgreSQL connection: + +```python +DATABASE = { + "HOST": "localhost", + "NAME": "netbox", + "USER": "netbox", + "PASSWORD": "", +} +``` + +Further configuration should take place. Some relevant resources can be found here: + +- Installation guide configuration docs: +- Configuration docs: +- Example configuration: + +## Web Server Setup + +As this role just sets up gunicorn, but doesn't set up a web server, you need to do that yourself. +The relevant documentation on how to do that can be found here: + +- Web server setup docs: +- Example base nginx config: + +## Links & Resources + +- The NetBox Git Repo: +- The NetBox installation docs: diff --git a/roles/netbox/handlers/main.yaml b/roles/netbox/handlers/main.yaml new file mode 100644 index 0000000..fd7eb62 --- /dev/null +++ b/roles/netbox/handlers/main.yaml @@ -0,0 +1,24 @@ +- name: Run upgrade script + ansible.builtin.command: /opt/netbox/upgrade.sh + become: true + # When it runs, this should always report changed. + changed_when: true + +- name: Ensure netbox systemd services are set up and up-to-date + ansible.builtin.systemd_service: + daemon_reload: true + name: "{{ item }}" + enabled: true + state: restarted + become: true + loop: + - "netbox.service" + - "netbox-rq.service" + +- name: Ensure netbox housekeeping timer is set up and up-to-date + ansible.builtin.systemd_service: + daemon_reload: true + name: "netbox-housekeeping.timer" + enabled: true + state: restarted + become: true diff --git a/roles/netbox/meta/argument_specs.yaml b/roles/netbox/meta/argument_specs.yaml new file mode 100644 index 0000000..f836b43 --- /dev/null +++ b/roles/netbox/meta/argument_specs.yaml @@ -0,0 +1,12 @@ +argument_specs: + main: + options: + netbox__version: + type: str + required: true + netbox__db_password: + type: str + required: true + netbox__config: + type: str + required: true diff --git a/roles/netbox/meta/main.yaml b/roles/netbox/meta/main.yaml new file mode 100644 index 0000000..79c845d --- /dev/null +++ b/roles/netbox/meta/main.yaml @@ -0,0 +1,11 @@ +--- +dependencies: + - role: redis + - role: postgresql + vars: + postgresql__dbs: + - name: netbox + owner: netbox + postgresql__users: + - name: netbox + password: "{{ netbox__db_password }}" diff --git a/roles/netbox/tasks/main.yaml b/roles/netbox/tasks/main.yaml new file mode 100644 index 0000000..89bf4a6 --- /dev/null +++ b/roles/netbox/tasks/main.yaml @@ -0,0 +1,103 @@ +- name: Ensure all dependencies are installed + ansible.builtin.apt: + name: + - python3 + - python3-pip + - python3-venv + - python3-dev + - build-essential + - libxml2-dev + - libxslt1-dev + - libffi-dev + - libpq-dev + - libssl-dev + - zlib1g-dev + - git + become: true + +- name: Ensure NetBox source is present + ansible.builtin.git: + repo: https://github.com/netbox-community/netbox.git + dest: /opt/netbox/ + version: "{{ netbox__version }}" + become: true + notify: + - Run upgrade script + - Ensure netbox systemd services are set up and up-to-date + +- name: Ensure netbox user + block: + - name: Ensure netbox group exists + ansible.builtin.group: + name: netbox + system: true + become: true + + - name: Ensure netbox user exists + ansible.builtin.user: + name: netbox + group: netbox + password: '!' + system: true + become: true + +- name: Ensure relevant directories are owned by netbox user + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: netbox + recurse: true + become: true + loop: + - "/opt/netbox/netbox/media/" + - "/opt/netbox/netbox/reports/" + - "/opt/netbox/netbox/scripts/" + +- name: Deploy configuration.py + ansible.builtin.copy: + content: "{{ netbox__config }}" + dest: "/opt/netbox/netbox/netbox/configuration.py" + mode: "0644" + owner: root + group: root + become: true + notify: Ensure netbox systemd services are set up and up-to-date + +- name: Ensure provided gunicorn config is copied + ansible.builtin.copy: + remote_src: true + src: "/opt/netbox/contrib/gunicorn.py" + dest: "/opt/netbox/gunicorn.py" + mode: "0644" + owner: root + group: root + become: true + notify: Ensure netbox systemd services are set up and up-to-date + +- name: Ensure provided netbox systemd service files are copied + ansible.builtin.copy: + remote_src: true + src: "/opt/netbox/contrib/{{ item }}" + dest: "/etc/systemd/system/{{ item }}" + mode: "0644" + owner: root + group: root + become: true + loop: + - "netbox.service" + - "netbox-rq.service" + notify: Ensure netbox systemd services are set up and up-to-date + +- name: Ensure provided housekeeping systemd service and timer are copied + ansible.builtin.copy: + remote_src: true + src: "/opt/netbox/contrib/{{ item }}" + dest: "/etc/systemd/system/{{ item }}" + mode: "0644" + owner: root + group: root + become: true + loop: + - "netbox-housekeeping.service" + - "netbox-housekeeping.timer" + notify: Ensure netbox housekeeping timer is set up and up-to-date From dbb784e2bbafdd4983c00fffd379c2e46a66b8f6 Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 03:52:02 +0100 Subject: [PATCH 15/46] add license notice to README This is in preparation for extending that notice to point out code that is licensed differently. --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 68c92b4..ab14d1d 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,7 @@ Im Ansible-Repo müssen diese Sachen hinzugefügt werden: * Individuelle Config für den Service. Wenn Docker Compose, hier weiterleiten auf den eigentlichen Dienst in Compose. * Cert-Dateinamen anpassen * `resources/chaosknoten/`*host*`/docker_compose/compose.yaml.j2`: Config für Docker Compose (wenn verwendet) + +## License + +This CCCHH ansible-ccchh repository is licensed under the [MIT License](./LICENSE). From 09a8551c8ae56a10b12752f1a2cacd0309fc5383 Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 05:22:21 +0100 Subject: [PATCH 16/46] add option to netbox role for custom pipeline code for OIDC mapping Add option to netbox role for ensuring custom pipeline code for OIDC group and role mapping is either present or not. The custom pipeline code is licensed under the Creative Commons: CC BY-SA 4.0 license. See: https://github.com/goauthentik/authentik/blob/main/LICENSE https://github.com/goauthentik/authentik/blob/main/website/integrations/services/netbox/index.md https://docs.goauthentik.io/integrations/services/netbox/ https://git.hamburg.ccc.de/CCCHH/nix-infra/commit/5676b1a4680dbe706686f38902f5607ec33330ff --- README.md | 3 +- roles/netbox/README.md | 13 ++++- roles/netbox/defaults/main.yaml | 1 + ...om_pipeline_oidc_group_and_role_mapping.py | 55 +++++++++++++++++++ roles/netbox/meta/argument_specs.yaml | 4 ++ roles/netbox/tasks/main.yaml | 21 +++++++ 6 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 roles/netbox/defaults/main.yaml create mode 100644 roles/netbox/files/custom_pipeline_oidc_group_and_role_mapping.py diff --git a/README.md b/README.md index ab14d1d..6906a7f 100644 --- a/README.md +++ b/README.md @@ -48,4 +48,5 @@ Im Ansible-Repo müssen diese Sachen hinzugefügt werden: ## License -This CCCHH ansible-ccchh repository is licensed under the [MIT License](./LICENSE). +This CCCHH ansible-ccchh repository is licensed under the [MIT License](./LICENSE). +[`custom_pipeline_oidc_group_and_role_mapping.py`](./roles/netbox/files/custom_pipeline_oidc_group_and_role_mapping.py) is licensed under the Creative Commons: CC BY-SA 4.0 license. diff --git a/roles/netbox/README.md b/roles/netbox/README.md index 38b7968..594e8f8 100644 --- a/roles/netbox/README.md +++ b/roles/netbox/README.md @@ -18,7 +18,9 @@ Should work on Debian-based distributions. ## Optional Arguments -None. +- `netbox__custom_pipeline_oidc_group_and_role_mapping`: Whether or not to have custom pipeline code for OIDC group and role mapping present. + See [Custom Pipeline Code for OIDC Group and Role Mapping](#custom-pipeline-code-for-oidc-group-and-role-mapping) for more infos. + Defaults to `false`. ## NetBox Configuration @@ -71,6 +73,15 @@ The relevant documentation on how to do that can be found here: - Web server setup docs: - Example base nginx config: +## Custom Pipeline Code for OIDC Group and Role Mapping + +Setting the option `netbox__custom_pipeline_oidc_group_and_role_mapping` to `true` makes this role ensure custom pipeline code for OIDC group and role mapping is present. +Note that this role uses code for NetBox >= 4.0.0. +The code is available in `files/custom_pipeline_oidc_group_and_role_mapping.py`, licensed under the CC BY-SA 4.0 license and taken from [this authentik NetBox documentation](https://docs.goauthentik.io/integrations/services/netbox/). +The documentation also shows how to use the pipeline code by defining a custom `SOCIAL_AUTH_PIPELINE`, which you also need to do, as the configuration isn't provided by this role. +However instead of under `netbox.custom_pipeline.` the functions are available under `netbox.custom_pipeline_oidc_mapping.` with this role. +See also [the default settings.py](https://github.com/netbox-community/netbox/blob/main/netbox/netbox/settings.py) for the default `SOCIAL_AUTH_PIPELINE`. + ## Links & Resources - The NetBox Git Repo: diff --git a/roles/netbox/defaults/main.yaml b/roles/netbox/defaults/main.yaml new file mode 100644 index 0000000..49b518e --- /dev/null +++ b/roles/netbox/defaults/main.yaml @@ -0,0 +1 @@ +netbox__custom_pipeline_oidc_group_and_role_mapping: false diff --git a/roles/netbox/files/custom_pipeline_oidc_group_and_role_mapping.py b/roles/netbox/files/custom_pipeline_oidc_group_and_role_mapping.py new file mode 100644 index 0000000..470f388 --- /dev/null +++ b/roles/netbox/files/custom_pipeline_oidc_group_and_role_mapping.py @@ -0,0 +1,55 @@ +# Licensed under Creative Commons: CC BY-SA 4.0 license. +# https://github.com/goauthentik/authentik/blob/main/LICENSE +# https://github.com/goauthentik/authentik/blob/main/website/integrations/services/netbox/index.md +# https://docs.goauthentik.io/integrations/services/netbox/ +from netbox.authentication import Group + +class AuthFailed(Exception): + pass + +def add_groups(response, user, backend, *args, **kwargs): + try: + groups = response['groups'] + except KeyError: + pass + + # Add all groups from oAuth token + for group in groups: + group, created = Group.objects.get_or_create(name=group) + user.groups.add(group) + +def remove_groups(response, user, backend, *args, **kwargs): + try: + groups = response['groups'] + except KeyError: + # Remove all groups if no groups in oAuth token + user.groups.clear() + pass + + # Get all groups of user + user_groups = [item.name for item in user.groups.all()] + # Get groups of user which are not part of oAuth token + delete_groups = list(set(user_groups) - set(groups)) + + # Delete non oAuth token groups + for delete_group in delete_groups: + group = Group.objects.get(name=delete_group) + user.groups.remove(group) + + +def set_roles(response, user, backend, *args, **kwargs): + # Remove Roles temporary + user.is_superuser = False + user.is_staff = False + try: + groups = response['groups'] + except KeyError: + # When no groups are set + # save the user without Roles + user.save() + pass + + # Set roles is role (superuser or staff) is in groups + user.is_superuser = True if 'superusers' in groups else False + user.is_staff = True if 'staff' in groups else False + user.save() diff --git a/roles/netbox/meta/argument_specs.yaml b/roles/netbox/meta/argument_specs.yaml index f836b43..0506389 100644 --- a/roles/netbox/meta/argument_specs.yaml +++ b/roles/netbox/meta/argument_specs.yaml @@ -10,3 +10,7 @@ argument_specs: netbox__config: type: str required: true + netbox__custom_pipeline_oidc_group_and_role_mapping: + type: bool + required: false + default: false diff --git a/roles/netbox/tasks/main.yaml b/roles/netbox/tasks/main.yaml index 89bf4a6..dffa746 100644 --- a/roles/netbox/tasks/main.yaml +++ b/roles/netbox/tasks/main.yaml @@ -25,6 +25,27 @@ - Run upgrade script - Ensure netbox systemd services are set up and up-to-date +- name: Ensures custom pipeline code for OIDC group and role mapping is present + ansible.builtin.copy: + src: custom_pipeline_oidc_group_and_role_mapping.py + dest: /opt/netbox/netbox/netbox/custom_pipeline_oidc_mapping.py + mode: "0644" + owner: root + group: root + when: netbox__custom_pipeline_oidc_group_and_role_mapping + become: true + notify: + - Ensure netbox systemd services are set up and up-to-date + +- name: Ensures custom pipeline code for OIDC group and role mapping is not present + ansible.builtin.file: + path: /opt/netbox/netbox/netbox/custom_pipeline_oidc_mapping.py + state: absent + when: not netbox__custom_pipeline_oidc_group_and_role_mapping + become: true + notify: + - Ensure netbox systemd services are set up and up-to-date + - name: Ensure netbox user block: - name: Ensure netbox group exists From 2ec1471d7fb1483bf1753e93b3971ce5a2df583c Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 19:57:15 +0100 Subject: [PATCH 17/46] netbox: move NetBox from NixOS to Ansible Also introduce netbox_hosts group for applying netbox role to multiple hosts. --- inventories/chaosknoten/host_vars/netbox.yaml | 16 +++++ inventories/chaosknoten/hosts.yaml | 12 ++++ playbooks/deploy.yaml | 5 ++ .../netbox/netbox/configuration.py.j2 | 60 +++++++++++++++++++ .../netbox/nginx/netbox.hamburg.ccc.de.conf | 48 +++++++++++++++ .../nginx/acme_challenge.conf | 2 +- .../public-reverse-proxy/nginx/nginx.conf | 2 +- 7 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 inventories/chaosknoten/host_vars/netbox.yaml create mode 100644 resources/chaosknoten/netbox/netbox/configuration.py.j2 create mode 100644 resources/chaosknoten/netbox/nginx/netbox.hamburg.ccc.de.conf diff --git a/inventories/chaosknoten/host_vars/netbox.yaml b/inventories/chaosknoten/host_vars/netbox.yaml new file mode 100644 index 0000000..2304112 --- /dev/null +++ b/inventories/chaosknoten/host_vars/netbox.yaml @@ -0,0 +1,16 @@ +netbox__version: "v4.1.7" +netbox__db_password: "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/netbox/DATABASE_PASSWORD', create=false, missing='error') }}" +netbox__config: "{{ lookup('ansible.builtin.template', 'resources/chaosknoten/netbox/netbox/configuration.py.j2') }}" +netbox__custom_pipeline_oidc_group_and_role_mapping: true + +nginx__version_spec: "" +nginx__configurations: + - name: netbox.hamburg.ccc.de + content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/netbox/nginx/netbox.hamburg.ccc.de.conf') }}" + +certbot__version_spec: "" +certbot__acme_account_email_address: j+letsencrypt-ccchh@jsts.xyz +certbot__certificate_domains: + - "netbox.hamburg.ccc.de" +certbot__new_cert_commands: + - "systemctl reload nginx.service" diff --git a/inventories/chaosknoten/hosts.yaml b/inventories/chaosknoten/hosts.yaml index 911a87d..0f10bea 100644 --- a/inventories/chaosknoten/hosts.yaml +++ b/inventories/chaosknoten/hosts.yaml @@ -32,6 +32,10 @@ all: mumble: ansible_host: mumble.hamburg.ccc.de ansible_user: chaos + netbox: + ansible_host: netbox-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de onlyoffice: ansible_host: onlyoffice-intern.hamburg.ccc.de ansible_user: chaos @@ -64,6 +68,7 @@ all: keycloak: lists: mumble: + netbox: onlyoffice: pad: pretalx: @@ -94,6 +99,7 @@ all: keycloak: lists: mumble: + netbox: onlyoffice: pad: pretalx: @@ -112,6 +118,7 @@ all: keycloak: lists: mumble: + netbox: onlyoffice: pad: pretalx: @@ -123,6 +130,7 @@ all: eh22-wiki: tickets: keycloak: + netbox: onlyoffice: pad: pretalx: @@ -136,6 +144,7 @@ all: tickets: cloud: keycloak: + netbox: onlyoffice: pad: pretalx: @@ -146,3 +155,6 @@ all: hosts: eh22-wiki: wiki: + netbox_hosts: + hosts: + netbox: diff --git a/playbooks/deploy.yaml b/playbooks/deploy.yaml index da2937f..66f03de 100644 --- a/playbooks/deploy.yaml +++ b/playbooks/deploy.yaml @@ -29,6 +29,11 @@ roles: - dokuwiki +- name: Ensure NetBox deployment on netbox_hosts + hosts: netbox_hosts + roles: + - netbox + - name: Ensure NGINX deployment on nginx_hosts, which are also public_reverse_proxy_hosts, before certbot role runs hosts: nginx_hosts:&public_reverse_proxy_hosts roles: diff --git a/resources/chaosknoten/netbox/netbox/configuration.py.j2 b/resources/chaosknoten/netbox/netbox/configuration.py.j2 new file mode 100644 index 0000000..789a539 --- /dev/null +++ b/resources/chaosknoten/netbox/netbox/configuration.py.j2 @@ -0,0 +1,60 @@ +ALLOWED_HOSTS = [ "netbox.hamburg.ccc.de" ] +DATABASE = { + "HOST": "localhost", + "NAME": "netbox", + "USER": "netbox", + "PASSWORD": "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/netbox/DATABASE_PASSWORD', create=false, missing='error') }}", +} +REDIS = { + "tasks": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 0, + "SSL": False, + }, + "caching": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 1, + "SSL": False, + }, +} +SECRET_KEY = "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/netbox/SECRET_KEY', create=false, missing='error') }}" +SESSION_COOKIE_SECURE = True + +# CCCHH ID (Keycloak) integration. +# https://github.com/python-social-auth/social-core/blob/0925304a9e437f8b729862687d3a808c7fb88a95/social_core/backends/keycloak.py#L7 +# https://python-social-auth.readthedocs.io/en/latest/backends/keycloak.html +REMOTE_AUTH_BACKEND = "social_core.backends.keycloak.KeycloakOAuth2" +SOCIAL_AUTH_KEYCLOAK_ACCESS_TOKEN_URL = ( + "https://id.hamburg.ccc.de/realms/ccchh/protocol/openid-connect/token" +) +SOCIAL_AUTH_KEYCLOAK_AUTHORIZATION_URL = ( + "https://id.hamburg.ccc.de/realms/ccchh/protocol/openid-connect/auth" +) +SOCIAL_AUTH_KEYCLOAK_KEY = "netbox" +SOCIAL_AUTH_KEYCLOAK_PUBLIC_KEY = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAi/Shi+b2OyYNGVFPsa6qf9SesEpRl5U5rpwgmt8H7NawMvwpPUYVW9o46QW0ulYcDmysT3BzpP3tagO/SFNoOjZdYe0D9nJ7vEp8KHbzR09KCfkyQIi0wLssKnDotVHL5JeUY+iKk+gjiwF9FSFSHPBqsST7hXVAut9LkOvs2aDod9AzbTH/uYbt4wfUm5l/1Ii8D+K7YcsFGUIqxv4XS/ylKqObqN4M2dac69iIwapoh6reaBQEm66vrOzJ+3yi4DZuPrkShJqi2hddtoyZihyCkF+eJJKEI5LrBf1KZB3Ec2YUrqk93ZGUGs/XY6R87QSfR3hJ82B1wnF+c2pw+QIDAQAB" +SOCIAL_AUTH_KEYCLOAK_SECRET = "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/netbox/SOCIAL_AUTH_KEYCLOAK_SECRET', create=false, missing='error') }}" +# Use custom OIDC group and role mapping pipeline functions added in via +# netbox__custom_pipeline_oidc_group_and_role_mapping. +# The default pipeline this is based on can be found here: +# https://github.com/netbox-community/netbox/blob/main/netbox/netbox/settings.py +SOCIAL_AUTH_PIPELINE = [ + "social_core.pipeline.social_auth.social_details", + "social_core.pipeline.social_auth.social_uid", + "social_core.pipeline.social_auth.social_user", + "social_core.pipeline.user.get_username", + "social_core.pipeline.user.create_user", + "social_core.pipeline.social_auth.associate_user", + "netbox.authentication.user_default_groups_handler", + "social_core.pipeline.social_auth.load_extra_data", + "social_core.pipeline.user.user_details", + # Custom OIDC group and role mapping functions. + "netbox.custom_pipeline_oidc_mapping.add_groups", + "netbox.custom_pipeline_oidc_mapping.remove_groups", + "netbox.custom_pipeline_oidc_mapping.set_roles", +] diff --git a/resources/chaosknoten/netbox/nginx/netbox.hamburg.ccc.de.conf b/resources/chaosknoten/netbox/nginx/netbox.hamburg.ccc.de.conf new file mode 100644 index 0000000..5550686 --- /dev/null +++ b/resources/chaosknoten/netbox/nginx/netbox.hamburg.ccc.de.conf @@ -0,0 +1,48 @@ +# partly generated 2022-01-08, Mozilla Guideline v5.6, nginx 1.17.7, OpenSSL 1.1.1k, intermediate configuration +# https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=intermediate&openssl=1.1.1k&guideline=5.6 +server { + # Listen on a custom port for the proxy protocol. + listen 8443 ssl http2 proxy_protocol; + # Make use of the ngx_http_realip_module to set the $remote_addr and + # $remote_port to the client address and client port, when using proxy + # protocol. + # First set our proxy protocol proxy as trusted. + set_real_ip_from 172.31.17.140; + # Then tell the realip_module to get the addreses from the proxy protocol + # header. + real_ip_header proxy_protocol; + + server_name netbox.hamburg.ccc.de; + + ssl_certificate /etc/letsencrypt/live/netbox.hamburg.ccc.de/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/netbox.hamburg.ccc.de/privkey.pem; + # verify chain of trust of OCSP response using Root CA and Intermediate certs + ssl_trusted_certificate /etc/letsencrypt/live/netbox.hamburg.ccc.de/chain.pem; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Port 443; + # This is https in any case. + proxy_set_header X-Forwarded-Proto https; + # Hide the X-Forwarded header. + proxy_hide_header X-Forwarded; + # Assume we are the only Reverse Proxy (well using Proxy Protocol, but that + # is transparent). + # Also provide "_hidden" for by, since it's not relevant. + proxy_set_header Forwarded "for=$remote_addr;proto=https;host=$host;by=_hidden"; + + client_max_body_size 25m; + + location /static/ { + alias /opt/netbox/netbox/static/; + } + + location / { + proxy_pass http://127.0.0.1:8001; + } +} diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf index d5ae146..c3f9fed 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf @@ -17,7 +17,7 @@ map $host $upstream_acme_challenge_host { invite.hamburg.ccc.de 172.31.17.144:31820; keycloak-admin.hamburg.ccc.de 172.31.17.144:31820; matrix.hamburg.ccc.de 172.31.17.150:31820; - netbox.hamburg.ccc.de 172.31.17.149:31820; + netbox.hamburg.ccc.de 172.31.17.167:31820; onlyoffice.hamburg.ccc.de 172.31.17.147:31820; pad.hamburg.ccc.de 172.31.17.141:31820; pretalx.hamburg.ccc.de 172.31.17.157:31820; diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf index 0529f4c..dfcf8d2 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf @@ -32,7 +32,7 @@ stream { onlyoffice.hamburg.ccc.de 172.31.17.147:8443; hackertours.hamburg.ccc.de 172.31.17.151:8443; staging.hackertours.hamburg.ccc.de 172.31.17.151:8443; - netbox.hamburg.ccc.de 172.31.17.149:8443; + netbox.hamburg.ccc.de 172.31.17.167:8443; matrix.hamburg.ccc.de 172.31.17.150:8443; element.hamburg.ccc.de 172.31.17.151:8443; branding-resources.hamburg.ccc.de 172.31.17.151:8443; From 4def1334d8b5c42338c022605d0ccad80396f19a Mon Sep 17 00:00:00 2001 From: June Date: Sat, 15 Feb 2025 20:06:55 +0100 Subject: [PATCH 18/46] for non-verbose output hide user passwords in postgresql role --- roles/postgresql/tasks/main.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roles/postgresql/tasks/main.yaml b/roles/postgresql/tasks/main.yaml index fc61857..8f89018 100644 --- a/roles/postgresql/tasks/main.yaml +++ b/roles/postgresql/tasks/main.yaml @@ -18,6 +18,8 @@ become: true become_user: postgres loop: "{{ postgresql__users }}" + loop_control: + label: "user {{ item.name }} with {{ 'a password' if item.password is defined else 'no password' }}" - name: Ensure dbs with owners community.postgresql.postgresql_db: From 61cd5053d097b2b729a575d12279496d56d70da2 Mon Sep 17 00:00:00 2001 From: June Date: Sun, 16 Feb 2025 00:36:10 +0100 Subject: [PATCH 19/46] flatten inventories making them simpler Remove the child groups as we weren't using their functionality anyway. Also remove the debian_11/12 host groups as they're not in use. --- inventories/chaosknoten/hosts.yaml | 316 ++++++++++++++--------------- inventories/z9/hosts.yaml | 44 ++-- 2 files changed, 177 insertions(+), 183 deletions(-) diff --git a/inventories/chaosknoten/hosts.yaml b/inventories/chaosknoten/hosts.yaml index 0f10bea..97ef4bb 100644 --- a/inventories/chaosknoten/hosts.yaml +++ b/inventories/chaosknoten/hosts.yaml @@ -1,160 +1,158 @@ all: - children: - debian_12: - hosts: - ccchoir: - ansible_host: ccchoir-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - cloud: - ansible_host: cloud-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - eh22-wiki: - ansible_host: eh22-wiki-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - grafana: - ansible_host: grafana-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - tickets: - ansible_host: tickets-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - keycloak: - ansible_host: keycloak-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - lists: - ansible_host: lists.hamburg.ccc.de - ansible_user: chaos - mumble: - ansible_host: mumble.hamburg.ccc.de - ansible_user: chaos - netbox: - ansible_host: netbox-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - onlyoffice: - ansible_host: onlyoffice-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - pad: - ansible_host: pad-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - pretalx: - ansible_host: pretalx-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - public-reverse-proxy: - ansible_host: public-reverse-proxy.hamburg.ccc.de - ansible_user: chaos - wiki: - ansible_host: wiki-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - zammad: - ansible_host: zammad-intern.hamburg.ccc.de - ansible_user: chaos - ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de - base_config_hosts: - hosts: - ccchoir: - cloud: - eh22-wiki: - grafana: - keycloak: - lists: - mumble: - netbox: - onlyoffice: - pad: - pretalx: - public-reverse-proxy: - tickets: - wiki: - zammad: - docker_compose_hosts: - hosts: - ccchoir: - grafana: - tickets: - keycloak: - lists: - onlyoffice: - pad: - pretalx: - zammad: - nextcloud_hosts: - hosts: - cloud: - nginx_hosts: - hosts: - ccchoir: - eh22-wiki: - grafana: - tickets: - keycloak: - lists: - mumble: - netbox: - onlyoffice: - pad: - pretalx: - public-reverse-proxy: - wiki: - zammad: - public_reverse_proxy_hosts: - hosts: - public-reverse-proxy: - certbot_hosts: - hosts: - ccchoir: - eh22-wiki: - grafana: - tickets: - keycloak: - lists: - mumble: - netbox: - onlyoffice: - pad: - pretalx: - wiki: - zammad: - prometheus_node_exporter_hosts: - hosts: - ccchoir: - eh22-wiki: - tickets: - keycloak: - netbox: - onlyoffice: - pad: - pretalx: - wiki: - zammad: - infrastructure_authorized_keys_hosts: - hosts: - ccchoir: - eh22-wiki: - grafana: - tickets: - cloud: - keycloak: - netbox: - onlyoffice: - pad: - pretalx: - public-reverse-proxy: - wiki: - zammad: - wiki_hosts: - hosts: - eh22-wiki: - wiki: - netbox_hosts: - hosts: - netbox: + hosts: + ccchoir: + ansible_host: ccchoir-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + cloud: + ansible_host: cloud-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + eh22-wiki: + ansible_host: eh22-wiki-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + grafana: + ansible_host: grafana-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + tickets: + ansible_host: tickets-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + keycloak: + ansible_host: keycloak-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + lists: + ansible_host: lists.hamburg.ccc.de + ansible_user: chaos + mumble: + ansible_host: mumble.hamburg.ccc.de + ansible_user: chaos + netbox: + ansible_host: netbox-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + onlyoffice: + ansible_host: onlyoffice-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + pad: + ansible_host: pad-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + pretalx: + ansible_host: pretalx-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + public-reverse-proxy: + ansible_host: public-reverse-proxy.hamburg.ccc.de + ansible_user: chaos + wiki: + ansible_host: wiki-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + zammad: + ansible_host: zammad-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de +base_config_hosts: + hosts: + ccchoir: + cloud: + eh22-wiki: + grafana: + keycloak: + lists: + mumble: + netbox: + onlyoffice: + pad: + pretalx: + public-reverse-proxy: + tickets: + wiki: + zammad: +docker_compose_hosts: + hosts: + ccchoir: + grafana: + tickets: + keycloak: + lists: + onlyoffice: + pad: + pretalx: + zammad: +nextcloud_hosts: + hosts: + cloud: +nginx_hosts: + hosts: + ccchoir: + eh22-wiki: + grafana: + tickets: + keycloak: + lists: + mumble: + netbox: + onlyoffice: + pad: + pretalx: + public-reverse-proxy: + wiki: + zammad: +public_reverse_proxy_hosts: + hosts: + public-reverse-proxy: +certbot_hosts: + hosts: + ccchoir: + eh22-wiki: + grafana: + tickets: + keycloak: + lists: + mumble: + netbox: + onlyoffice: + pad: + pretalx: + wiki: + zammad: +prometheus_node_exporter_hosts: + hosts: + ccchoir: + eh22-wiki: + tickets: + keycloak: + netbox: + onlyoffice: + pad: + pretalx: + wiki: + zammad: +infrastructure_authorized_keys_hosts: + hosts: + ccchoir: + eh22-wiki: + grafana: + tickets: + cloud: + keycloak: + netbox: + onlyoffice: + pad: + pretalx: + public-reverse-proxy: + wiki: + zammad: +wiki_hosts: + hosts: + eh22-wiki: + wiki: +netbox_hosts: + hosts: + netbox: diff --git a/inventories/z9/hosts.yaml b/inventories/z9/hosts.yaml index f8a452d..0dde922 100644 --- a/inventories/z9/hosts.yaml +++ b/inventories/z9/hosts.yaml @@ -1,25 +1,21 @@ all: - children: - debian_11: - hosts: - light: - ansible_host: light.z9.ccchh.net - ansible_user: chaos - authoritative-dns: - ansible_host: authoritative-dns.z9.ccchh.net - ansible_user: chaos - debian_12: - hosts: - nginx_hosts: - hosts: - light: - ola_hosts: - hosts: - light: - foobazdmx_hosts: - hosts: - light: - infrastructure_authorized_keys_hosts: - hosts: - light: - authoritative-dns: + hosts: + light: + ansible_host: light.z9.ccchh.net + ansible_user: chaos + authoritative-dns: + ansible_host: authoritative-dns.z9.ccchh.net + ansible_user: chaos +nginx_hosts: + hosts: + light: +ola_hosts: + hosts: + light: +foobazdmx_hosts: + hosts: + light: +infrastructure_authorized_keys_hosts: + hosts: + light: + authoritative-dns: From ff540126a161fbc0cdff8f71bbb4938a7e459e7b Mon Sep 17 00:00:00 2001 From: June Date: Sun, 16 Feb 2025 02:34:14 +0100 Subject: [PATCH 20/46] add chaosknoten to hosts and new hypervisors group Also exclude that group from the tasks otherwise targeting all hosts. --- inventories/chaosknoten/hosts.yaml | 5 +++++ playbooks/deploy.yaml | 4 ++-- playbooks/maintenance.yaml | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/inventories/chaosknoten/hosts.yaml b/inventories/chaosknoten/hosts.yaml index 97ef4bb..4949dc5 100644 --- a/inventories/chaosknoten/hosts.yaml +++ b/inventories/chaosknoten/hosts.yaml @@ -4,6 +4,8 @@ all: ansible_host: ccchoir-intern.hamburg.ccc.de ansible_user: chaos ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + chaosknoten: + ansible_host: chaosknoten.hamburg.ccc.de cloud: ansible_host: cloud-intern.hamburg.ccc.de ansible_user: chaos @@ -57,6 +59,9 @@ all: ansible_host: zammad-intern.hamburg.ccc.de ansible_user: chaos ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de +hypervisors: + hosts: + chaosknoten: base_config_hosts: hosts: ccchoir: diff --git a/playbooks/deploy.yaml b/playbooks/deploy.yaml index 66f03de..d7dcdac 100644 --- a/playbooks/deploy.yaml +++ b/playbooks/deploy.yaml @@ -59,8 +59,8 @@ roles: - prometheus_node_exporter -- name: Configure unattended upgrades - hosts: all +- name: Configure unattended upgrades for all non-hypervisors + hosts: all:!hypervisors become: true roles: - role: debops.debops.unattended_upgrades diff --git a/playbooks/maintenance.yaml b/playbooks/maintenance.yaml index 8ce7caa..31c817b 100644 --- a/playbooks/maintenance.yaml +++ b/playbooks/maintenance.yaml @@ -26,8 +26,8 @@ vars: nginx__version_spec: "{{ nextcloud__nginx_version_spec | default('') }}" -- name: Make Sure System Package Are Up-To-Date - hosts: all +- name: Make Sure System Package Are Up-To-Date for all non-hypervisors + hosts: all:!hypervisors roles: - apt_update_and_upgrade From 690ec9bebce851157d1bff5f550af7ae3d927692 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Sun, 16 Feb 2025 22:09:27 +0100 Subject: [PATCH 21/46] Use distinct short code to avoid confusion with EH pretix --- .../chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf b/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf index f8a802f..40882d8 100644 --- a/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf +++ b/resources/chaosknoten/tickets/nginx/tickets.hamburg.ccc.de.conf @@ -38,7 +38,11 @@ server { location = / { #return 302 https://wiki.hamburg.ccc.de/infrastructure:service-overview#tickets_pretix; - return 302 https://tickets.hamburg.ccc.de/hackertours/eh22/; + return 302 https://tickets.hamburg.ccc.de/hackertours/eh22ht/; + } + + location = /hackertours/eh22/ { + return 302 https://tickets.hamburg.ccc.de/hackertours/eh22ht/; } location / { From 496f122968a2a8f8a8e500022b4ae96b4b546dd6 Mon Sep 17 00:00:00 2001 From: June Date: Mon, 17 Feb 2025 00:32:50 +0100 Subject: [PATCH 22/46] give ansible docker comp. cron jobs unique names to not overw. each oth. Give them unique names, so the latter doesn't overwrite the former. Also make the task names nicer. --- roles/docker_compose/tasks/main.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/roles/docker_compose/tasks/main.yaml b/roles/docker_compose/tasks/main.yaml index 8e211c1..d11d826 100644 --- a/roles/docker_compose/tasks/main.yaml +++ b/roles/docker_compose/tasks/main.yaml @@ -104,18 +104,18 @@ name: anacron state: present -- name: Install automatic update cron job +- name: ensure automatic update cron job is present become: true ansible.builtin.cron: - name: 'docker compose auto update' + name: 'ansible docker compose auto update' minute: "0" hour: "5" job: "cd /ansible_docker_compose; docker compose pull && docker compose up -d" -- name: Install automatic cleanup cron job +- name: ensure automatic cleanup cron job is present become: true ansible.builtin.cron: - name: 'docker compose auto update' + name: 'ansible docker compose auto cleanup' minute: "23" hour: "4" job: "docker system prune -a -f" From 79012fb7f8616cc709d8b3095dea241214494e13 Mon Sep 17 00:00:00 2001 From: June Date: Mon, 17 Feb 2025 01:23:35 +0100 Subject: [PATCH 23/46] eh22-netbox: setup EH22 NetBox --- .../chaosknoten/host_vars/eh22-netbox.yaml | 16 +++++ inventories/chaosknoten/hosts.yaml | 10 ++++ .../eh22-netbox/netbox/configuration.py.j2 | 60 +++++++++++++++++++ .../nginx/netbox.eh22.easterhegg.eu.conf | 48 +++++++++++++++ .../grafana/docker_compose/prometheus.yml | 1 + .../nginx/acme_challenge.conf | 1 + .../public-reverse-proxy/nginx/nginx.conf | 1 + 7 files changed, 137 insertions(+) create mode 100644 inventories/chaosknoten/host_vars/eh22-netbox.yaml create mode 100644 resources/chaosknoten/eh22-netbox/netbox/configuration.py.j2 create mode 100644 resources/chaosknoten/eh22-netbox/nginx/netbox.eh22.easterhegg.eu.conf diff --git a/inventories/chaosknoten/host_vars/eh22-netbox.yaml b/inventories/chaosknoten/host_vars/eh22-netbox.yaml new file mode 100644 index 0000000..56ba344 --- /dev/null +++ b/inventories/chaosknoten/host_vars/eh22-netbox.yaml @@ -0,0 +1,16 @@ +netbox__version: "v4.1.7" +netbox__db_password: "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/eh22-netbox/DATABASE_PASSWORD', create=false, missing='error') }}" +netbox__config: "{{ lookup('ansible.builtin.template', 'resources/chaosknoten/eh22-netbox/netbox/configuration.py.j2') }}" +netbox__custom_pipeline_oidc_group_and_role_mapping: true + +nginx__version_spec: "" +nginx__configurations: + - name: netbox.eh22.easterhegg.eu + content: "{{ lookup('ansible.builtin.file', 'resources/chaosknoten/eh22-netbox/nginx/netbox.eh22.easterhegg.eu.conf') }}" + +certbot__version_spec: "" +certbot__acme_account_email_address: j+letsencrypt-ccchh@jsts.xyz +certbot__certificate_domains: + - "netbox.eh22.easterhegg.eu" +certbot__new_cert_commands: + - "systemctl reload nginx.service" diff --git a/inventories/chaosknoten/hosts.yaml b/inventories/chaosknoten/hosts.yaml index 4949dc5..cae283d 100644 --- a/inventories/chaosknoten/hosts.yaml +++ b/inventories/chaosknoten/hosts.yaml @@ -10,6 +10,10 @@ all: ansible_host: cloud-intern.hamburg.ccc.de ansible_user: chaos ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de + eh22-netbox: + ansible_host: eh22-netbox-intern.hamburg.ccc.de + ansible_user: chaos + ansible_ssh_common_args: -J ssh://chaos@public-reverse-proxy.hamburg.ccc.de eh22-wiki: ansible_host: eh22-wiki-intern.hamburg.ccc.de ansible_user: chaos @@ -66,6 +70,7 @@ base_config_hosts: hosts: ccchoir: cloud: + eh22-netbox: eh22-wiki: grafana: keycloak: @@ -96,6 +101,7 @@ nextcloud_hosts: nginx_hosts: hosts: ccchoir: + eh22-netbox: eh22-wiki: grafana: tickets: @@ -115,6 +121,7 @@ public_reverse_proxy_hosts: certbot_hosts: hosts: ccchoir: + eh22-netbox: eh22-wiki: grafana: tickets: @@ -130,6 +137,7 @@ certbot_hosts: prometheus_node_exporter_hosts: hosts: ccchoir: + eh22-netbox: eh22-wiki: tickets: keycloak: @@ -142,6 +150,7 @@ prometheus_node_exporter_hosts: infrastructure_authorized_keys_hosts: hosts: ccchoir: + eh22-netbox: eh22-wiki: grafana: tickets: @@ -160,4 +169,5 @@ wiki_hosts: wiki: netbox_hosts: hosts: + eh22-netbox: netbox: diff --git a/resources/chaosknoten/eh22-netbox/netbox/configuration.py.j2 b/resources/chaosknoten/eh22-netbox/netbox/configuration.py.j2 new file mode 100644 index 0000000..56995ca --- /dev/null +++ b/resources/chaosknoten/eh22-netbox/netbox/configuration.py.j2 @@ -0,0 +1,60 @@ +ALLOWED_HOSTS = [ "netbox.eh22.easterhegg.eu" ] +DATABASE = { + "HOST": "localhost", + "NAME": "netbox", + "USER": "netbox", + "PASSWORD": "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/eh22-netbox/DATABASE_PASSWORD', create=false, missing='error') }}", +} +REDIS = { + "tasks": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 0, + "SSL": False, + }, + "caching": { + "HOST": "localhost", + "PORT": 6379, + "USERNAME": "", + "PASSWORD": "", + "DATABASE": 1, + "SSL": False, + }, +} +SECRET_KEY = "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/eh22-netbox/SECRET_KEY', create=false, missing='error') }}" +SESSION_COOKIE_SECURE = True + +# CCCHH ID (Keycloak) integration. +# https://github.com/python-social-auth/social-core/blob/0925304a9e437f8b729862687d3a808c7fb88a95/social_core/backends/keycloak.py#L7 +# https://python-social-auth.readthedocs.io/en/latest/backends/keycloak.html +REMOTE_AUTH_BACKEND = "social_core.backends.keycloak.KeycloakOAuth2" +SOCIAL_AUTH_KEYCLOAK_ACCESS_TOKEN_URL = ( + "https://id.hamburg.ccc.de/realms/ccchh/protocol/openid-connect/token" +) +SOCIAL_AUTH_KEYCLOAK_AUTHORIZATION_URL = ( + "https://id.hamburg.ccc.de/realms/ccchh/protocol/openid-connect/auth" +) +SOCIAL_AUTH_KEYCLOAK_KEY = "eh22-netbox" +SOCIAL_AUTH_KEYCLOAK_PUBLIC_KEY = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAi/Shi+b2OyYNGVFPsa6qf9SesEpRl5U5rpwgmt8H7NawMvwpPUYVW9o46QW0ulYcDmysT3BzpP3tagO/SFNoOjZdYe0D9nJ7vEp8KHbzR09KCfkyQIi0wLssKnDotVHL5JeUY+iKk+gjiwF9FSFSHPBqsST7hXVAut9LkOvs2aDod9AzbTH/uYbt4wfUm5l/1Ii8D+K7YcsFGUIqxv4XS/ylKqObqN4M2dac69iIwapoh6reaBQEm66vrOzJ+3yi4DZuPrkShJqi2hddtoyZihyCkF+eJJKEI5LrBf1KZB3Ec2YUrqk93ZGUGs/XY6R87QSfR3hJ82B1wnF+c2pw+QIDAQAB" +SOCIAL_AUTH_KEYCLOAK_SECRET = "{{ lookup('community.general.passwordstore', 'noc/vm-secrets/chaosknoten/eh22-netbox/SOCIAL_AUTH_KEYCLOAK_SECRET', create=false, missing='error') }}" +# Use custom OIDC group and role mapping pipeline functions added in via +# netbox__custom_pipeline_oidc_group_and_role_mapping. +# The default pipeline this is based on can be found here: +# https://github.com/netbox-community/netbox/blob/main/netbox/netbox/settings.py +SOCIAL_AUTH_PIPELINE = [ + "social_core.pipeline.social_auth.social_details", + "social_core.pipeline.social_auth.social_uid", + "social_core.pipeline.social_auth.social_user", + "social_core.pipeline.user.get_username", + "social_core.pipeline.user.create_user", + "social_core.pipeline.social_auth.associate_user", + "netbox.authentication.user_default_groups_handler", + "social_core.pipeline.social_auth.load_extra_data", + "social_core.pipeline.user.user_details", + # Custom OIDC group and role mapping functions. + "netbox.custom_pipeline_oidc_mapping.add_groups", + "netbox.custom_pipeline_oidc_mapping.remove_groups", + "netbox.custom_pipeline_oidc_mapping.set_roles", +] diff --git a/resources/chaosknoten/eh22-netbox/nginx/netbox.eh22.easterhegg.eu.conf b/resources/chaosknoten/eh22-netbox/nginx/netbox.eh22.easterhegg.eu.conf new file mode 100644 index 0000000..6c9d458 --- /dev/null +++ b/resources/chaosknoten/eh22-netbox/nginx/netbox.eh22.easterhegg.eu.conf @@ -0,0 +1,48 @@ +# partly generated 2022-01-08, Mozilla Guideline v5.6, nginx 1.17.7, OpenSSL 1.1.1k, intermediate configuration +# https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=intermediate&openssl=1.1.1k&guideline=5.6 +server { + # Listen on a custom port for the proxy protocol. + listen 8443 ssl http2 proxy_protocol; + # Make use of the ngx_http_realip_module to set the $remote_addr and + # $remote_port to the client address and client port, when using proxy + # protocol. + # First set our proxy protocol proxy as trusted. + set_real_ip_from 172.31.17.140; + # Then tell the realip_module to get the addreses from the proxy protocol + # header. + real_ip_header proxy_protocol; + + server_name netbox.eh22.easterhegg.eu; + + ssl_certificate /etc/letsencrypt/live/netbox.eh22.easterhegg.eu/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/netbox.eh22.easterhegg.eu/privkey.pem; + # verify chain of trust of OCSP response using Root CA and Intermediate certs + ssl_trusted_certificate /etc/letsencrypt/live/netbox.eh22.easterhegg.eu/chain.pem; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Port 443; + # This is https in any case. + proxy_set_header X-Forwarded-Proto https; + # Hide the X-Forwarded header. + proxy_hide_header X-Forwarded; + # Assume we are the only Reverse Proxy (well using Proxy Protocol, but that + # is transparent). + # Also provide "_hidden" for by, since it's not relevant. + proxy_set_header Forwarded "for=$remote_addr;proto=https;host=$host;by=_hidden"; + + client_max_body_size 25m; + + location /static/ { + alias /opt/netbox/netbox/static/; + } + + location / { + proxy_pass http://127.0.0.1:8001; + } +} diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus.yml b/resources/chaosknoten/grafana/docker_compose/prometheus.yml index 59fae01..6aeb56e 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus.yml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus.yml @@ -83,6 +83,7 @@ scrape_configs: - public-web-static-intern.hamburg.ccc.de:9100 - git-intern.hamburg.ccc.de:9100 - forgejo-actions-runner-intern.hamburg.ccc.de:9100 + - eh22-netbox-intern.hamburg.ccc.de:9100 - eh22-wiki-intern.hamburg.ccc.de:9100 - nix-box-june-intern.hamburg.ccc.de:9100 - mjolnir-intern.hamburg.ccc.de:9100 diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf index c3f9fed..4e0e8e3 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/acme_challenge.conf @@ -71,6 +71,7 @@ map $host $upstream_acme_challenge_host { hydra.hamburg.ccc.de 172.31.17.163:31820; cfp.eh22.easterhegg.eu 172.31.17.157:31820; hub.eh22.easterhegg.eu eh22hub-intern.hamburg.ccc.de:31820; + netbox.eh22.easterhegg.eu eh22-netbox-intern.hamburg.ccc.de:31820; default ""; } diff --git a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf index dfcf8d2..4a7f84c 100644 --- a/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf +++ b/resources/chaosknoten/public-reverse-proxy/nginx/nginx.conf @@ -89,6 +89,7 @@ stream { hydra.hamburg.ccc.de 172.31.17.163:8443; cfp.eh22.easterhegg.eu pretalx-intern.hamburg.ccc.de:8443; hub.eh22.easterhegg.eu eh22hub-intern.hamburg.ccc.de:8443; + netbox.eh22.easterhegg.eu eh22-netbox-intern.hamburg.ccc.de:8443; } server { From 7271426502bee5d43d4169b6c9b939d1bfe3ae4b Mon Sep 17 00:00:00 2001 From: Dario Date: Mon, 17 Feb 2025 22:20:25 +0100 Subject: [PATCH 24/46] dokuwiki: add option to checkout custom design repo --- roles/dokuwiki/README.md | 6 ++++++ roles/dokuwiki/defaults/main.yml | 3 +++ roles/dokuwiki/tasks/main.yml | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/roles/dokuwiki/README.md b/roles/dokuwiki/README.md index 457dd45..b6bd80d 100644 --- a/roles/dokuwiki/README.md +++ b/roles/dokuwiki/README.md @@ -22,6 +22,12 @@ None. - `dokuwiki__php_version`: Your PHP version, default `7.4` for Debian 11 and `8.2` for Debian 12 - `dokuwiki__php_user`: User of your php-fpm process, default `www-data` - `dokuwiki__nginx_user`: User of your nginx process, default `nginx` +- `dokuwiki__custom_theme_git_url`: Clone URL of custom theme (HTTPS only) +- `dokuwiki__custom_theme_version`: Version (git branch, tag, ...) to checkout, default main +- `dokuwiki__custom_theme_name`: Name of the directory into which the custom theme is cloned + +Warning: if a directory of the same name as `dokuwiki__custom_theme_name` already exists in the same directory, the task will fail. +This needs to be manually deleted first before the first run with the the custom theme enabled. ## nginx Configuration diff --git a/roles/dokuwiki/defaults/main.yml b/roles/dokuwiki/defaults/main.yml index 8358114..33ba384 100644 --- a/roles/dokuwiki/defaults/main.yml +++ b/roles/dokuwiki/defaults/main.yml @@ -3,3 +3,6 @@ dokuwiki__installpath: "/var/www/dokuwiki" dokuwiki__php_version: "{{ dokuwiki__php_versions[ansible_distribution + '-' + ansible_distribution_major_version] }}" dokuwiki__php_user: "www-data" dokuwiki__nginx_user: "nginx" +dokuwiki__custom_theme_git_url: "" +dokuwiki__custom_theme_version: "main" +dokuwiki__custom_theme_name: "" diff --git a/roles/dokuwiki/tasks/main.yml b/roles/dokuwiki/tasks/main.yml index 664f159..8c836ce 100644 --- a/roles/dokuwiki/tasks/main.yml +++ b/roles/dokuwiki/tasks/main.yml @@ -1,4 +1,4 @@ -- name: Install php-fpm +- name: Install dependencies become: true ansible.builtin.apt: name: @@ -9,6 +9,7 @@ - php-intl - php-gd - php-sqlite3 + - git diff: false - name: Ensure `php-fpm` is enabled @@ -42,3 +43,17 @@ owner: root group: root mode: "0644" + +- name: checkout custom theme git repo + become: true + ansible.builtin.git: + repo: "{{ dokuwiki__custom_theme_git_url }}" + dest: "{{ dokuwiki__installpath }}/lib/tpl/{{ dokuwiki__custom_theme_name }}" + version: "{{ dokuwiki__custom_theme_version }}" + force: true + depth: 1 + single_branch: true + track_submodules: true + when: + - dokuwiki__custom_theme_git_url != "" + - dokuwiki__custom_theme_name != "" From 5dc2520ecd72058b505d707c42bbb60cca8ad4c9 Mon Sep 17 00:00:00 2001 From: Dario Date: Mon, 17 Feb 2025 22:26:17 +0100 Subject: [PATCH 25/46] eh22-wiki: configure custom eh22-theme --- inventories/chaosknoten/host_vars/eh22-wiki.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inventories/chaosknoten/host_vars/eh22-wiki.yaml b/inventories/chaosknoten/host_vars/eh22-wiki.yaml index a8814c0..4b7ce47 100644 --- a/inventories/chaosknoten/host_vars/eh22-wiki.yaml +++ b/inventories/chaosknoten/host_vars/eh22-wiki.yaml @@ -9,3 +9,6 @@ certbot__certificate_domains: - "eh22.easterhegg.eu" certbot__new_cert_commands: - "systemctl reload nginx.service" +dokuwiki__custom_theme_git_url: "https://git.hamburg.ccc.de/EH22/dokuwiki-template-sprintdoc-modified.git" +dokuwiki__custom_theme_version: "eh22" +dokuwiki__custom_theme_name: "sprintdoc" From 172d6472bc66258c86830d6997e15020e68113a6 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 02:59:00 +0100 Subject: [PATCH 26/46] nginx(role): remove unnecessary apt-get update step The nginx package gets installed with "update_cache: true" afterwards anyway, so the apt-get update step shouldn't be necessary. --- roles/nginx/handlers/main.yaml | 5 ----- roles/nginx/tasks/main/repo_setup.yaml | 6 ------ 2 files changed, 11 deletions(-) diff --git a/roles/nginx/handlers/main.yaml b/roles/nginx/handlers/main.yaml index bc420db..57e07fc 100644 --- a/roles/nginx/handlers/main.yaml +++ b/roles/nginx/handlers/main.yaml @@ -3,8 +3,3 @@ name: nginx.service state: restarted become: true - -- name: apt-get update - ansible.builtin.apt: - update_cache: true - become: true diff --git a/roles/nginx/tasks/main/repo_setup.yaml b/roles/nginx/tasks/main/repo_setup.yaml index 9edc156..253beb1 100644 --- a/roles/nginx/tasks/main/repo_setup.yaml +++ b/roles/nginx/tasks/main/repo_setup.yaml @@ -18,21 +18,18 @@ owner: root group: root become: true - notify: apt-get update - name: make sure NGINX APT repository is added ansible.builtin.apt_repository: repo: "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" state: present become: true - notify: apt-get update - name: make sure NGINX APT source repository is added ansible.builtin.apt_repository: repo: "deb-src [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" state: present become: true - notify: apt-get update - name: set up repository pinning to make sure nginx package gets installed from NGINX repositories ansible.builtin.copy: @@ -46,6 +43,3 @@ group: root mode: "0644" become: true - -- name: Flush handlers to make sure "apt-get update" handler runs, if needed - ansible.builtin.meta: flush_handlers From df22074aeb78fa1f4edee001cbb5673629a087c4 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:59:08 +0100 Subject: [PATCH 27/46] nginx(role): simplify installation by removing version spec We always just want the latest anyway and therefore don't use it, so no need to keep the complexity introduced by that setting. Also merge repo_setup and nginx_install task lists into one nginx_install task list as keeping two files isn't necessary. Finally improving naming a bit. --- roles/nginx/README.md | 4 -- roles/nginx/meta/argument_specs.yaml | 9 ---- roles/nginx/tasks/main.yaml | 7 +-- roles/nginx/tasks/main/nginx_install.yaml | 53 ++++++++++++++++++++--- roles/nginx/tasks/main/repo_setup.yaml | 45 ------------------- 5 files changed, 47 insertions(+), 71 deletions(-) delete mode 100644 roles/nginx/tasks/main/repo_setup.yaml diff --git a/roles/nginx/README.md b/roles/nginx/README.md index 9abf2ea..e162123 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -20,10 +20,6 @@ The following distributions are supported: For the required arguments look at the [`argument_specs.yaml`](./meta/argument_specs.yaml). -## Updates - -This role updates NGINX to the latest version covered by the provided version spec., if needed. - ## `hosts` The `hosts` for this role need to be the machines, for which you want to make sure the `nginx` package is installed from the NGINX repos and a desirable baseline of NGINX configs is deployed. diff --git a/roles/nginx/meta/argument_specs.yaml b/roles/nginx/meta/argument_specs.yaml index d79ba9e..693e196 100644 --- a/roles/nginx/meta/argument_specs.yaml +++ b/roles/nginx/meta/argument_specs.yaml @@ -1,15 +1,6 @@ argument_specs: main: options: - nginx__version_spec: - description: >- - The version specification to use for installing the `nginx` package. The - provided version specification will be used like the following: `nginx={{ - nginx__version_spec }}*`. This makes it possible to e.g. specify - until a minor version (like `1.3.`) and then have patch versions be - installed automatically (like `1.3.1` and so on). - type: str - required: true nginx__deploy_redirect_conf: description: >- Whether or not to deploy a `redirect.conf` to diff --git a/roles/nginx/tasks/main.yaml b/roles/nginx/tasks/main.yaml index 6ecb2da..89c9be2 100644 --- a/roles/nginx/tasks/main.yaml +++ b/roles/nginx/tasks/main.yaml @@ -3,12 +3,7 @@ name: nginx tasks_from: make_sure_nginx_configuration_names_are_valid -- name: make sure NGINX repos are setup - ansible.builtin.include_role: - name: nginx - tasks_from: main/repo_setup - -- name: make sure NGINX is installed +- name: ensure NGINX is installed ansible.builtin.include_role: name: nginx tasks_from: main/nginx_install diff --git a/roles/nginx/tasks/main/nginx_install.yaml b/roles/nginx/tasks/main/nginx_install.yaml index 6d63ad3..b58ec69 100644 --- a/roles/nginx/tasks/main/nginx_install.yaml +++ b/roles/nginx/tasks/main/nginx_install.yaml @@ -1,13 +1,52 @@ -- name: make sure the `nginx` package is installed +- name: gather package facts + ansible.builtin.package_facts: + manager: apt + +- name: make sure `gnupg` package is installed ansible.builtin.apt: - name: nginx={{ nginx__version_spec }}* + name: gnupg state: present - allow_change_held_packages: true update_cache: true become: true + when: "'gnupg' not in ansible_facts.packages" -- name: apt-mark hold `nginx` - ansible.builtin.dpkg_selections: - name: nginx - selection: hold +- name: make sure NGINX signing key is added + ansible.builtin.get_url: + url: https://nginx.org/keys/nginx_signing.key + dest: /etc/apt/trusted.gpg.d/nginx.asc + mode: "0644" + owner: root + group: root + become: true + +- name: make sure NGINX APT repository is added + ansible.builtin.apt_repository: + repo: "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" + state: present + become: true + +- name: make sure NGINX APT source repository is added + ansible.builtin.apt_repository: + repo: "deb-src [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" + state: present + become: true + +- name: set up repository pinning to make sure nginx package gets installed from NGINX repositories + ansible.builtin.copy: + content: | + Package: * + Pin: origin nginx.org + Pin: release o=nginx + Pin-Priority: 900 + dest: /etc/apt/preferences.d/99nginx + owner: root + group: root + mode: "0644" + become: true + +- name: Ensure nginx is installed + ansible.builtin.apt: + name: nginx + state: present + update_cache: true become: true diff --git a/roles/nginx/tasks/main/repo_setup.yaml b/roles/nginx/tasks/main/repo_setup.yaml deleted file mode 100644 index 253beb1..0000000 --- a/roles/nginx/tasks/main/repo_setup.yaml +++ /dev/null @@ -1,45 +0,0 @@ -- name: gather package facts - ansible.builtin.package_facts: - manager: apt - -- name: make sure `gnupg` package is installed - ansible.builtin.apt: - name: gnupg - state: present - update_cache: true - become: true - when: "'gnupg' not in ansible_facts.packages" - -- name: make sure NGINX signing key is added - ansible.builtin.get_url: - url: https://nginx.org/keys/nginx_signing.key - dest: /etc/apt/trusted.gpg.d/nginx.asc - mode: "0644" - owner: root - group: root - become: true - -- name: make sure NGINX APT repository is added - ansible.builtin.apt_repository: - repo: "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" - state: present - become: true - -- name: make sure NGINX APT source repository is added - ansible.builtin.apt_repository: - repo: "deb-src [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" - state: present - become: true - -- name: set up repository pinning to make sure nginx package gets installed from NGINX repositories - ansible.builtin.copy: - content: | - Package: * - Pin: origin nginx.org - Pin: release o=nginx - Pin-Priority: 900 - dest: /etc/apt/preferences.d/99nginx - owner: root - group: root - mode: "0644" - become: true From 2e9f73b46bf0bc1d226a5dd2e046765bd037d5b7 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:11:46 +0100 Subject: [PATCH 28/46] nginx(role): simplify ensuring that gnupg is installed Also improve naming. --- roles/nginx/tasks/main/nginx_install.yaml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/roles/nginx/tasks/main/nginx_install.yaml b/roles/nginx/tasks/main/nginx_install.yaml index b58ec69..a877c67 100644 --- a/roles/nginx/tasks/main/nginx_install.yaml +++ b/roles/nginx/tasks/main/nginx_install.yaml @@ -1,14 +1,8 @@ -- name: gather package facts - ansible.builtin.package_facts: - manager: apt - -- name: make sure `gnupg` package is installed +- name: Ensure gnupg is installed ansible.builtin.apt: name: gnupg state: present - update_cache: true become: true - when: "'gnupg' not in ansible_facts.packages" - name: make sure NGINX signing key is added ansible.builtin.get_url: From 4adf39d969357e20f40c2ec15d9fba20b96468be Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:27:49 +0100 Subject: [PATCH 29/46] nginx(role): document arguments in README for better discoverability Document the role arguments in the README instead of in the argument_specs for better discoverability and readability. --- roles/nginx/README.md | 24 +++++++++++++++++++++++- roles/nginx/meta/argument_specs.yaml | 25 ------------------------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/roles/nginx/README.md b/roles/nginx/README.md index e162123..343e50b 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -18,7 +18,29 @@ The following distributions are supported: ## Required Arguments -For the required arguments look at the [`argument_specs.yaml`](./meta/argument_specs.yaml). +None. + +## Optional Arguments + +- `nginx__deploy_redirect_conf`: Whether or not to deploy a config redirecting from HTTP to HTTPS, while still forwarding the `/.well-known/acme-challenge/` to localhost Port 31820 for certificate issuing. + See [`files/redirect.conf`](./files/redirect.conf) for the configuration that would be deployed. + Defaults to `true`. +- `nginx__deploy_tls_conf`: Whether or not to deploy a config configuring some TLS settings reasonably. + See [`files/tls.conf`](./files/tls.conf) for the configuration that would be deployed. + Defaults to `true`. +- `nginx__deploy_logging_conf`: Whether or not to deploy a config configuring logging to journald. + See [`files/logging.conf`](./files/logging.conf) for the configuration that would be deployed. + Defaults to `true`. +- `nginx__configurations`: List of nginx configurations to ensure are deployed. +- `nginx__configurations.*.name`: This name with `.conf` appended will be used for the configurations file name under `/etc/nginx/conf.d/`. + `tls` and `redirect` are reserved names. +- `nginx__configurations.*.content`: This configurations content. +- `nginx__use_custom_nginx_conf`: Whether or not to use a custom `/etc/nginx/nginx.conf`. + If set to true, you must provide the content for a custom `nginx.conf` via `nginx__custom_nginx_conf`. + Defaults to `false`. +- `nginx__custom_nginx_conf`: The content to use for the custom `nginx.conf`. + Needs `nginx__use_custom_nginx_conf` to be set to true to work. + You should probably still make sure that your custom `nginx.conf` includes `/etc/nginx/conf.d/*.conf`, so that the other configuration files still work. ## `hosts` diff --git a/roles/nginx/meta/argument_specs.yaml b/roles/nginx/meta/argument_specs.yaml index 693e196..866cb81 100644 --- a/roles/nginx/meta/argument_specs.yaml +++ b/roles/nginx/meta/argument_specs.yaml @@ -2,21 +2,14 @@ argument_specs: main: options: nginx__deploy_redirect_conf: - description: >- - Whether or not to deploy a `redirect.conf` to - `/etc/nginx/conf.d/redirect.conf`. type: bool required: false default: true nginx__deploy_tls_conf: - description: >- - Whether or not to deploy a `tls.conf` to `/etc/nginx/conf.d/tls.conf`. type: bool required: false default: true nginx__deploy_logging_conf: - description: >- - Whether or not to deploy a `logging.conf` to `/etc/nginx/conf.d/logging.conf`. type: bool required: false default: true @@ -28,34 +21,16 @@ argument_specs: default: [ ] options: name: - description: >- - The name of the configuration file, where the configuration should - be deployed to. The file will be placed under `/etc/nginx/conf.d/` - and `.conf` will be appended to the given name. So in the end the - path will be like this: `/etc/nginx/conf.d/\{\{ name \}\}.conf`. - Note that the names `tls` and `redirect` aren't allowed. type: str required: true content: - description: The content of the configuration. type: str required: true nginx__use_custom_nginx_conf: - description: >- - Whether or not to use a custom `/etc/nginx/nginx.conf`. If set to - true, you must provide a custom `nginx.conf` via - `nginx__custom_nginx_conf`. type: bool required: false default: false nginx__custom_nginx_conf: - description: >- - The value for a `nginx.conf` to be placed at `/etc/nginx/nginx.conf`. - You must set `nginx__use_custom_nginx_conf` to true for this value to - be used. - You should probably make sure that your custom `nginx.conf` still - includes `/etc/nginx/conf.d/*.conf` so that the configuration provided - using `nginx__configurations` still work. type: str required: false default: "" From 3d22f7ffae6ca142c253cf8999543a8e8a14ac61 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:35:37 +0100 Subject: [PATCH 30/46] nginx(role): document Debian 12 support --- roles/nginx/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/nginx/README.md b/roles/nginx/README.md index 343e50b..88467c3 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -15,6 +15,7 @@ The entry points available for external use are: The following distributions are supported: - Debian 11 +- Debian 12 ## Required Arguments From 5809e4015f15fc09624ed0102c84960f9a82e314 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:37:20 +0100 Subject: [PATCH 31/46] nginx(role): add "logging" to the reserved configuration names --- roles/nginx/README.md | 2 +- .../tasks/make_sure_nginx_configuration_names_are_valid.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/nginx/README.md b/roles/nginx/README.md index 88467c3..c399c50 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -34,7 +34,7 @@ None. Defaults to `true`. - `nginx__configurations`: List of nginx configurations to ensure are deployed. - `nginx__configurations.*.name`: This name with `.conf` appended will be used for the configurations file name under `/etc/nginx/conf.d/`. - `tls` and `redirect` are reserved names. + `tls`, `redirect` and `logging` are reserved names. - `nginx__configurations.*.content`: This configurations content. - `nginx__use_custom_nginx_conf`: Whether or not to use a custom `/etc/nginx/nginx.conf`. If set to true, you must provide the content for a custom `nginx.conf` via `nginx__custom_nginx_conf`. diff --git a/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml b/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml index 54ea6f5..234b12c 100644 --- a/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml +++ b/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml @@ -3,4 +3,5 @@ msg: "You used the following name: `{{ item.name }}`. Please make sure to not use the following names: `tls`, `redirect`." when: item.name == "tls" or item.name == "redirect" + or item.name == "logging" loop: "{{ nginx__configurations }}" From 9b2ef55f867646fa3c24dc5c99a198089f58bab2 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 03:40:00 +0100 Subject: [PATCH 32/46] nginx(role): remove unneces. "hosts" and "entry points" sec. from README --- roles/nginx/README.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/roles/nginx/README.md b/roles/nginx/README.md index c399c50..f990c67 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -4,12 +4,6 @@ Makes sure the `nginx` package is installed from the NGINX repos on the specifie Also makes sure a desirable baseline of NGINX configs is deployed on the specified hosts. For the NGINX site configurations the config template below can be used. -## Entry Points - -The entry points available for external use are: - -- `main` - ## Supported Distributions The following distributions are supported: @@ -43,10 +37,6 @@ None. Needs `nginx__use_custom_nginx_conf` to be set to true to work. You should probably still make sure that your custom `nginx.conf` includes `/etc/nginx/conf.d/*.conf`, so that the other configuration files still work. -## `hosts` - -The `hosts` for this role need to be the machines, for which you want to make sure the `nginx` package is installed from the NGINX repos and a desirable baseline of NGINX configs is deployed. - ## Config Template Here's a config template, which can be used for new NGINX site configs, which you can supply to this role using the `nginx__configurations` argument. From 344dd78981a5ba8db7932028143ef7e5bad7f969 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 04:07:07 +0100 Subject: [PATCH 33/46] nginx(role): use better naming, wording and file structure --- roles/nginx/handlers/main.yaml | 2 +- roles/nginx/tasks/main.yaml | 21 +++--- .../tasks/main/01_validate_config_names.yaml | 7 ++ ...inx_install.yaml => 02_nginx_install.yaml} | 8 +-- ...nfig_deploy.yaml => 03_config_deploy.yaml} | 68 +++++++++---------- ...e_nginx_configuration_names_are_valid.yaml | 7 -- 6 files changed, 55 insertions(+), 58 deletions(-) create mode 100644 roles/nginx/tasks/main/01_validate_config_names.yaml rename roles/nginx/tasks/main/{nginx_install.yaml => 02_nginx_install.yaml} (81%) rename roles/nginx/tasks/main/{config_deploy.yaml => 03_config_deploy.yaml} (62%) delete mode 100644 roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml diff --git a/roles/nginx/handlers/main.yaml b/roles/nginx/handlers/main.yaml index 57e07fc..0a366e9 100644 --- a/roles/nginx/handlers/main.yaml +++ b/roles/nginx/handlers/main.yaml @@ -1,4 +1,4 @@ -- name: Restart `nginx.service` +- name: Restart nginx ansible.builtin.systemd: name: nginx.service state: restarted diff --git a/roles/nginx/tasks/main.yaml b/roles/nginx/tasks/main.yaml index 89c9be2..4a86530 100644 --- a/roles/nginx/tasks/main.yaml +++ b/roles/nginx/tasks/main.yaml @@ -1,14 +1,11 @@ -- name: make sure nginx configuration names are valid - ansible.builtin.include_role: - name: nginx - tasks_from: make_sure_nginx_configuration_names_are_valid +- name: Ensure valid configuration names + ansible.builtin.import_tasks: + file: main/01_validate_config_names.yaml -- name: ensure NGINX is installed - ansible.builtin.include_role: - name: nginx - tasks_from: main/nginx_install +- name: Ensure nginx is installed + ansible.builtin.import_tasks: + file: main/02_nginx_install.yaml -- name: make sure desirable NGINX configs are deployed - ansible.builtin.include_role: - name: nginx - tasks_from: main/config_deploy +- name: Ensure configuration deployment + ansible.builtin.import_tasks: + file: main/03_config_deploy.yaml diff --git a/roles/nginx/tasks/main/01_validate_config_names.yaml b/roles/nginx/tasks/main/01_validate_config_names.yaml new file mode 100644 index 0000000..7991b89 --- /dev/null +++ b/roles/nginx/tasks/main/01_validate_config_names.yaml @@ -0,0 +1,7 @@ +- name: Ensure that the given configuration names are valid + ansible.builtin.fail: + msg: "You used one of the reserved configuration names: '{{ item.name }}'." + when: item.name == "tls" + or item.name == "redirect" + or item.name == "logging" + loop: "{{ nginx__configurations }}" diff --git a/roles/nginx/tasks/main/nginx_install.yaml b/roles/nginx/tasks/main/02_nginx_install.yaml similarity index 81% rename from roles/nginx/tasks/main/nginx_install.yaml rename to roles/nginx/tasks/main/02_nginx_install.yaml index a877c67..9ceb323 100644 --- a/roles/nginx/tasks/main/nginx_install.yaml +++ b/roles/nginx/tasks/main/02_nginx_install.yaml @@ -4,7 +4,7 @@ state: present become: true -- name: make sure NGINX signing key is added +- name: Ensure NGINX signing key is added ansible.builtin.get_url: url: https://nginx.org/keys/nginx_signing.key dest: /etc/apt/trusted.gpg.d/nginx.asc @@ -13,19 +13,19 @@ group: root become: true -- name: make sure NGINX APT repository is added +- name: Ensure NGINX APT repository is added ansible.builtin.apt_repository: repo: "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" state: present become: true -- name: make sure NGINX APT source repository is added +- name: Ensure NGINX APT source repository is added ansible.builtin.apt_repository: repo: "deb-src [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/nginx.asc] https://nginx.org/packages/debian/ {{ ansible_distribution_release }} nginx" state: present become: true -- name: set up repository pinning to make sure nginx package gets installed from NGINX repositories +- name: Ensure repository pinning to make sure nginx package gets installed from NGINX repositories is set up ansible.builtin.copy: content: | Package: * diff --git a/roles/nginx/tasks/main/config_deploy.yaml b/roles/nginx/tasks/main/03_config_deploy.yaml similarity index 62% rename from roles/nginx/tasks/main/config_deploy.yaml rename to roles/nginx/tasks/main/03_config_deploy.yaml index 01580b1..2f0c834 100644 --- a/roles/nginx/tasks/main/config_deploy.yaml +++ b/roles/nginx/tasks/main/03_config_deploy.yaml @@ -1,13 +1,13 @@ -- name: check, if a save of a previous `nginx.conf` is present +- name: Check, if a save of a previous `nginx.conf` is present ansible.builtin.stat: path: /etc/nginx/nginx.conf.ansiblesave - register: nginx__nginx_conf_ansiblesave_stat_result + register: nginx__nginx_conf_ansiblesave_stat -- name: handle the case, where a custom `nginx.conf` is to be used +- name: Handle the case, where a custom `nginx.conf` is to be used when: nginx__use_custom_nginx_conf block: - - name: when no `nginx.conf.ansiblesave` is present, save the current `nginx.conf` - when: not nginx__nginx_conf_ansiblesave_stat_result.stat.exists + - name: When no `nginx.conf.ansiblesave` is present, save the current `nginx.conf` + when: not nginx__nginx_conf_ansiblesave_stat.stat.exists ansible.builtin.copy: force: true dest: /etc/nginx/nginx.conf.ansiblesave @@ -18,7 +18,7 @@ src: /etc/nginx/nginx.conf become: true - - name: deploy the custom `nginx.conf` + - name: Ensure the custom `nginx.conf` is deployed ansible.builtin.copy: content: "{{ nginx__custom_nginx_conf }}" dest: "/etc/nginx/nginx.conf" @@ -26,13 +26,13 @@ owner: root group: root become: true - notify: Restart `nginx.service` + notify: Restart nginx -- name: handle the case, where no custom `nginx.conf` is to be used +- name: Handle the case, where no custom `nginx.conf` is to be used when: not nginx__use_custom_nginx_conf block: - - name: when a `nginx.conf.ansiblesave` is present, copy it to `nginx.conf` - when: nginx__nginx_conf_ansiblesave_stat_result.stat.exists + - name: When a `nginx.conf.ansiblesave` is present, copy it to `nginx.conf` + when: nginx__nginx_conf_ansiblesave_stat.stat.exists ansible.builtin.copy: force: true dest: /etc/nginx/nginx.conf @@ -42,32 +42,32 @@ remote_src: true src: /etc/nginx/nginx.conf.ansiblesave become: true - notify: Restart `nginx.service` + notify: Restart nginx - - name: delete the `nginx.conf.ansiblesave`, if it is present - when: nginx__nginx_conf_ansiblesave_stat_result.stat.exists + - name: Ensure no `nginx.conf.ansiblesave` is present + when: nginx__nginx_conf_ansiblesave_stat.stat.exists ansible.builtin.file: path: /etc/nginx/nginx.conf.ansiblesave state: absent become: true -- name: make sure mozilla dhparam is deployed +- name: Ensure mozilla dhparam is deployed ansible.builtin.get_url: force: true dest: /etc/nginx-mozilla-dhparam mode: "0644" url: https://ssl-config.mozilla.org/ffdhe2048.txt become: true - notify: Restart `nginx.service` + notify: Restart nginx -- name: set `nginx__config_files_to_exist` fact initially to an empty list +- name: Set `nginx__config_files_to_exist` fact initially to an empty list ansible.builtin.set_fact: nginx__config_files_to_exist: [ ] -- name: handle the case, where tls.conf should be deployed +- name: Handle the case, where tls.conf should be deployed when: nginx__deploy_tls_conf block: - - name: make sure tls.conf is deployed + - name: Ensure tls.conf is deployed ansible.builtin.copy: force: true dest: /etc/nginx/conf.d/tls.conf @@ -76,16 +76,16 @@ group: root src: tls.conf become: true - notify: Restart `nginx.service` + notify: Restart nginx - - name: add tls.conf to nginx__config_files_to_exist + - name: Add tls.conf to nginx__config_files_to_exist ansible.builtin.set_fact: nginx__config_files_to_exist: "{{ nginx__config_files_to_exist + [ 'tls.conf' ] }}" # noqa: jinja[spacing] -- name: handle the case, where redirect.conf should be deployed +- name: Handle the case, where redirect.conf should be deployed when: nginx__deploy_redirect_conf block: - - name: make sure redirect.conf is deployed + - name: Ensure redirect.conf is deployed ansible.builtin.copy: force: true dest: /etc/nginx/conf.d/redirect.conf @@ -94,16 +94,16 @@ group: root src: redirect.conf become: true - notify: Restart `nginx.service` + notify: Restart nginx - - name: add redirect.conf to nginx__config_files_to_exist + - name: Add redirect.conf to nginx__config_files_to_exist ansible.builtin.set_fact: nginx__config_files_to_exist: "{{ nginx__config_files_to_exist + [ 'redirect.conf' ] }}" # noqa: jinja[spacing] -- name: handle the case, where logging.conf should be deployed +- name: Handle the case, where logging.conf should be deployed when: nginx__deploy_logging_conf block: - - name: make sure logging.conf is deployed + - name: Ensure logging.conf is deployed ansible.builtin.copy: force: true dest: /etc/nginx/conf.d/logging.conf @@ -112,13 +112,13 @@ group: root src: logging.conf become: true - notify: Restart `nginx.service` + notify: Restart nginx - - name: add logging.conf to nginx__config_files_to_exist + - name: Add logging.conf to nginx__config_files_to_exist ansible.builtin.set_fact: nginx__config_files_to_exist: "{{ nginx__config_files_to_exist + [ 'logging.conf' ] }}" # noqa: jinja[spacing] -- name: make sure all given configuration files are deployed +- name: Ensure all given configuration files are deployed ansible.builtin.copy: content: "{{ item.content }}" dest: "/etc/nginx/conf.d/{{ item.name }}.conf" @@ -127,24 +127,24 @@ group: root become: true loop: "{{ nginx__configurations }}" - notify: Restart `nginx.service` + notify: Restart nginx -- name: add names plus suffix from `nginx__configurations` to `nginx__config_files_to_exist` fact +- name: Add names with suffixes from `nginx__configurations` to `nginx__config_files_to_exist` fact ansible.builtin.set_fact: nginx__config_files_to_exist: "{{ nginx__config_files_to_exist + [ item.name + '.conf' ] }}" # noqa: jinja[spacing] loop: "{{ nginx__configurations }}" -- name: find configuration files to remove +- name: Find configuration files to remove ansible.builtin.find: paths: /etc/nginx/conf.d/ recurse: false excludes: "{{ nginx__config_files_to_exist }}" register: nginx__config_files_to_remove -- name: remove all configuration file, which should be removed +- name: Remove all configuration file, which should be removed ansible.builtin.file: path: "{{ item.path }}" state: absent become: true loop: "{{ nginx__config_files_to_remove.files }}" - notify: Restart `nginx.service` + notify: Restart nginx diff --git a/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml b/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml deleted file mode 100644 index 234b12c..0000000 --- a/roles/nginx/tasks/make_sure_nginx_configuration_names_are_valid.yaml +++ /dev/null @@ -1,7 +0,0 @@ -- name: make sure nginx configuration names are valid - ansible.builtin.fail: - msg: "You used the following name: `{{ item.name }}`. Please make sure to not use the following names: `tls`, `redirect`." - when: item.name == "tls" - or item.name == "redirect" - or item.name == "logging" - loop: "{{ nginx__configurations }}" From 34b9ee2fa4c2b19403d180fb44e4a160ced299da Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 04:11:33 +0100 Subject: [PATCH 34/46] nginx(role): simplify description in README --- roles/nginx/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/roles/nginx/README.md b/roles/nginx/README.md index f990c67..94668d2 100644 --- a/roles/nginx/README.md +++ b/roles/nginx/README.md @@ -1,8 +1,6 @@ # Role `nginx` -Makes sure the `nginx` package is installed from the NGINX repos on the specified hosts. -Also makes sure a desirable baseline of NGINX configs is deployed on the specified hosts. -For the NGINX site configurations the config template below can be used. +Ensures nginx is installed from the NGINX repos and setup as specified via the arguments. ## Supported Distributions From 5e5c980f147c855bb9c5e6b78762dcf58c36195b Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 04:28:08 +0100 Subject: [PATCH 35/46] check(playbook): print all held packages --- playbooks/check.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/playbooks/check.yaml b/playbooks/check.yaml index 63ea631..0945944 100644 --- a/playbooks/check.yaml +++ b/playbooks/check.yaml @@ -29,3 +29,14 @@ - name: Print .dpkg-* files list ansible.builtin.debug: var: check__dpkg_files_list + + - name: Get all held packages + ansible.builtin.command: apt-mark showhold + when: ansible_facts['pkg_mgr'] == "apt" + changed_when: false + register: check__apt_mark_showhold + + - name: Print all held packages + ansible.builtin.debug: + var: check__apt_mark_showhold.stdout_lines + when: check__apt_mark_showhold.stdout_lines != [] From 07511ef72358f7ba67ef8b3c046a035d3746de1e Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 04:51:26 +0100 Subject: [PATCH 36/46] grafana(host): remove decomissioned nix-box-june from Prometheus targets --- resources/chaosknoten/grafana/docker_compose/prometheus.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus.yml b/resources/chaosknoten/grafana/docker_compose/prometheus.yml index 6aeb56e..5f6232f 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus.yml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus.yml @@ -85,7 +85,6 @@ scrape_configs: - forgejo-actions-runner-intern.hamburg.ccc.de:9100 - eh22-netbox-intern.hamburg.ccc.de:9100 - eh22-wiki-intern.hamburg.ccc.de:9100 - - nix-box-june-intern.hamburg.ccc.de:9100 - mjolnir-intern.hamburg.ccc.de:9100 - woodpecker-intern.hamburg.ccc.de:9100 - penpot-intern.hamburg.ccc.de:9100 From d62c070ccca2110b91bd813ae28df24987fa39a1 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:17:38 +0100 Subject: [PATCH 37/46] maintenance(playbook): fix playbook by using updated nginx role res. --- playbooks/maintenance.yaml | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/playbooks/maintenance.yaml b/playbooks/maintenance.yaml index 31c817b..8b6d037 100644 --- a/playbooks/maintenance.yaml +++ b/playbooks/maintenance.yaml @@ -1,30 +1,10 @@ -- name: Ensure NGINX repo and install on nginx_hosts - hosts: nginx_hosts +- name: Ensure nginx install on nginx_hosts and nextcloud_hosts + hosts: nginx_hosts:nextcloud_hosts tasks: - - name: make sure NGINX repos are setup + - name: Ensure NGINX is installed ansible.builtin.include_role: name: nginx - tasks_from: main/repo_setup - - - name: make sure NGINX is installed - ansible.builtin.include_role: - name: nginx - tasks_from: main/nginx_install - -- name: Ensure NGINX repo and install on nextcloud_hosts - hosts: nextcloud_hosts:!nginx_hosts - tasks: - - name: make sure NGINX repos are setup - ansible.builtin.include_role: - name: nginx - tasks_from: main/repo_setup - - - name: make sure NGINX is installed - ansible.builtin.include_role: - name: nginx - tasks_from: main/nginx_install - vars: - nginx__version_spec: "{{ nextcloud__nginx_version_spec | default('') }}" + tasks_from: main/02_nginx_install.yaml - name: Make Sure System Package Are Up-To-Date for all non-hypervisors hosts: all:!hypervisors From ce812fb00671a24176394b32c296f1809ca17784 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:33:30 +0100 Subject: [PATCH 38/46] docker(role): update README Document Debian 12 support, enhance wording, bring structure in line with the READMEs of more modern roles and remove unnecessary sections. --- roles/docker/README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/roles/docker/README.md b/roles/docker/README.md index 2e8a47f..b7f38e1 100644 --- a/roles/docker/README.md +++ b/roles/docker/README.md @@ -1,26 +1,22 @@ # Role `docker` -Makes sure Docker Engine and other related packages are installed from the Docker repos on the specified hosts. -For details see: [`tasks/main/02_docker_install.yaml`](./tasks/main/02_docker_install.yaml). +Ensures the Docker Engine and other related packages are installed from the Docker repos. +For detailed list of packages see: [`tasks/main/02_docker_install.yaml`](./tasks/main/02_docker_install.yaml). ## Supported Distributions The following distributions are supported: - Debian 11 +- Debian 12 ## Required Arguments None. -## Updates +## Optional Arguments -This role doesn't handle updates. -However it uses the system package manager for installing Docker Engine and the other related packages, so when you're making sure the system packages are up-to-date, you're handling updates for the packages installed by this role as well. - -## `hosts` - -The `hosts` for this role need to be the machines for which you want to make sure Docker Engine and other related packages are installed from the Docker repos. +None. ## Links & Resources From 89f3e55eacfa9620332ec8fbf3c79036928a7fd5 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:35:45 +0100 Subject: [PATCH 39/46] docker(role): use better naming --- roles/docker/tasks/main.yaml | 6 +++--- roles/docker/tasks/main/01_repo_setup.yaml | 4 ++-- roles/docker/tasks/main/02_docker_install.yaml | 2 +- roles/docker/tasks/main/03_docker_config.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/roles/docker/tasks/main.yaml b/roles/docker/tasks/main.yaml index aa01005..8c8044d 100644 --- a/roles/docker/tasks/main.yaml +++ b/roles/docker/tasks/main.yaml @@ -1,11 +1,11 @@ -- name: make sure the Docker repo is setup +- name: Ensure Docker repo is setup ansible.builtin.import_tasks: file: main/01_repo_setup.yaml -- name: make sure Docker Engine and other related packages are installed +- name: Ensure Docker Engine and other related packages are installed ansible.builtin.import_tasks: file: main/02_docker_install.yaml -- name: configure the Docker daemon +- name: Ensure Docker daemon configuration ansible.builtin.import_tasks: file: main/03_docker_config.yaml diff --git a/roles/docker/tasks/main/01_repo_setup.yaml b/roles/docker/tasks/main/01_repo_setup.yaml index 24d3dcf..aa77521 100644 --- a/roles/docker/tasks/main/01_repo_setup.yaml +++ b/roles/docker/tasks/main/01_repo_setup.yaml @@ -1,4 +1,4 @@ -- name: make sure Dockers GPG key is added +- name: Ensure Dockers GPG key is added ansible.builtin.get_url: url: https://download.docker.com/linux/debian/gpg dest: /etc/apt/trusted.gpg.d/docker.asc @@ -7,7 +7,7 @@ group: root become: true -- name: make sure Dockers APT repository is added +- name: Ensure Docker APT repository is added ansible.builtin.apt_repository: repo: "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/docker.asc] https://download.docker.com/linux/debian {{ ansible_distribution_release }} stable" filename: docker diff --git a/roles/docker/tasks/main/02_docker_install.yaml b/roles/docker/tasks/main/02_docker_install.yaml index 5617195..f2ae880 100644 --- a/roles/docker/tasks/main/02_docker_install.yaml +++ b/roles/docker/tasks/main/02_docker_install.yaml @@ -1,4 +1,4 @@ -- name: make sure Docker Engine and other related packages are installed +- name: Ensure Docker Engine and other related packages are installed ansible.builtin.apt: name: - docker-ce diff --git a/roles/docker/tasks/main/03_docker_config.yaml b/roles/docker/tasks/main/03_docker_config.yaml index 0f26500..639e8fa 100644 --- a/roles/docker/tasks/main/03_docker_config.yaml +++ b/roles/docker/tasks/main/03_docker_config.yaml @@ -1,7 +1,7 @@ -# Configure the Docker daemon with the following settings: +# Ensure the Docker daemon is configured with the following settings: # - log to systemd journal # https://docs.docker.com/engine/logging/drivers/journald/ -- name: configure Docker daemon +- name: Ensure Docker daemon configuration ansible.builtin.copy: src: daemon.json dest: /etc/docker/daemon.json From 7420ed60105dbcf5b5c76c90279c4c8b3889bd98 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:43:39 +0100 Subject: [PATCH 40/46] nginx(role): split up repo setup and install task lists to estab. conv. Split up repo setup and package installation after all to establish this as a convention (its already done this way in the docker role and was done this way in the nginx role before) to highlight that an external repo is used. --- playbooks/maintenance.yaml | 11 ++++++++--- roles/nginx/tasks/main.yaml | 8 ++++++-- .../{02_nginx_install.yaml => 02_repo_setup.yaml} | 7 ------- roles/nginx/tasks/main/03_nginx_install.yaml | 6 ++++++ .../{03_config_deploy.yaml => 04_config_deploy.yaml} | 0 5 files changed, 20 insertions(+), 12 deletions(-) rename roles/nginx/tasks/main/{02_nginx_install.yaml => 02_repo_setup.yaml} (90%) create mode 100644 roles/nginx/tasks/main/03_nginx_install.yaml rename roles/nginx/tasks/main/{03_config_deploy.yaml => 04_config_deploy.yaml} (100%) diff --git a/playbooks/maintenance.yaml b/playbooks/maintenance.yaml index 8b6d037..40ca666 100644 --- a/playbooks/maintenance.yaml +++ b/playbooks/maintenance.yaml @@ -1,10 +1,15 @@ -- name: Ensure nginx install on nginx_hosts and nextcloud_hosts +- name: Ensure NGINX repo setup and nginx install on nginx_hosts and nextcloud_hosts hosts: nginx_hosts:nextcloud_hosts tasks: - - name: Ensure NGINX is installed + - name: Ensure NGINX repo is setup ansible.builtin.include_role: name: nginx - tasks_from: main/02_nginx_install.yaml + tasks_from: main/02_repo_setup.yaml + + - name: Ensure nginx is installed + ansible.builtin.include_role: + name: nginx + tasks_from: main/03_nginx_install.yaml - name: Make Sure System Package Are Up-To-Date for all non-hypervisors hosts: all:!hypervisors diff --git a/roles/nginx/tasks/main.yaml b/roles/nginx/tasks/main.yaml index 4a86530..0cade0b 100644 --- a/roles/nginx/tasks/main.yaml +++ b/roles/nginx/tasks/main.yaml @@ -2,10 +2,14 @@ ansible.builtin.import_tasks: file: main/01_validate_config_names.yaml +- name: Ensure NGINX repo is set up + ansible.builtin.import_tasks: + file: main/02_repo_setup.yaml + - name: Ensure nginx is installed ansible.builtin.import_tasks: - file: main/02_nginx_install.yaml + file: main/03_nginx_install.yaml - name: Ensure configuration deployment ansible.builtin.import_tasks: - file: main/03_config_deploy.yaml + file: main/04_config_deploy.yaml diff --git a/roles/nginx/tasks/main/02_nginx_install.yaml b/roles/nginx/tasks/main/02_repo_setup.yaml similarity index 90% rename from roles/nginx/tasks/main/02_nginx_install.yaml rename to roles/nginx/tasks/main/02_repo_setup.yaml index 9ceb323..eaaec30 100644 --- a/roles/nginx/tasks/main/02_nginx_install.yaml +++ b/roles/nginx/tasks/main/02_repo_setup.yaml @@ -37,10 +37,3 @@ group: root mode: "0644" become: true - -- name: Ensure nginx is installed - ansible.builtin.apt: - name: nginx - state: present - update_cache: true - become: true diff --git a/roles/nginx/tasks/main/03_nginx_install.yaml b/roles/nginx/tasks/main/03_nginx_install.yaml new file mode 100644 index 0000000..86df2e1 --- /dev/null +++ b/roles/nginx/tasks/main/03_nginx_install.yaml @@ -0,0 +1,6 @@ +- name: Ensure nginx is installed + ansible.builtin.apt: + name: nginx + state: present + update_cache: true + become: true diff --git a/roles/nginx/tasks/main/03_config_deploy.yaml b/roles/nginx/tasks/main/04_config_deploy.yaml similarity index 100% rename from roles/nginx/tasks/main/03_config_deploy.yaml rename to roles/nginx/tasks/main/04_config_deploy.yaml From 1ea63a19d38ff1285aceeeccf3c012a4e57ad3c2 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:48:01 +0100 Subject: [PATCH 41/46] maintenance(playbook): ensure docker repo and install before update As with ensuring the nginx repo setup and install on relevant hosts, do the same for docker. --- playbooks/maintenance.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/playbooks/maintenance.yaml b/playbooks/maintenance.yaml index 40ca666..5cf70c5 100644 --- a/playbooks/maintenance.yaml +++ b/playbooks/maintenance.yaml @@ -11,6 +11,19 @@ name: nginx tasks_from: main/03_nginx_install.yaml +- name: Ensure Docker repo setup and package install on relevant hosts + hosts: docker_compose_hosts:nextcloud_hosts + tasks: + - name: Ensure Docker repo is setup + ansible.builtin.include_role: + name: docker + tasks_from: main/01_repo_setup.yaml + + - name: Ensure Docker Engine and other related packages are installed + ansible.builtin.include_role: + name: docker + tasks_from: main/02_docker_install.yaml + - name: Make Sure System Package Are Up-To-Date for all non-hypervisors hosts: all:!hypervisors roles: From 9dba002f10cbf5d6a0c3bcc5ae196ed1b9bff4ee Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 05:48:52 +0100 Subject: [PATCH 42/46] maintenance(playbook): simplify nginx repo setup and install task name --- playbooks/maintenance.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playbooks/maintenance.yaml b/playbooks/maintenance.yaml index 5cf70c5..c912a53 100644 --- a/playbooks/maintenance.yaml +++ b/playbooks/maintenance.yaml @@ -1,4 +1,4 @@ -- name: Ensure NGINX repo setup and nginx install on nginx_hosts and nextcloud_hosts +- name: Ensure NGINX repo setup and nginx install on relevant hosts hosts: nginx_hosts:nextcloud_hosts tasks: - name: Ensure NGINX repo is setup From 0e4df5b590b74f44485ed9cbc20c3c5d4add8e30 Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 06:07:47 +0100 Subject: [PATCH 43/46] nginx(role): make loop output manageable using loop_control label --- roles/nginx/tasks/main/01_validate_config_names.yaml | 2 ++ roles/nginx/tasks/main/04_config_deploy.yaml | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/roles/nginx/tasks/main/01_validate_config_names.yaml b/roles/nginx/tasks/main/01_validate_config_names.yaml index 7991b89..b2a4a1c 100644 --- a/roles/nginx/tasks/main/01_validate_config_names.yaml +++ b/roles/nginx/tasks/main/01_validate_config_names.yaml @@ -5,3 +5,5 @@ or item.name == "redirect" or item.name == "logging" loop: "{{ nginx__configurations }}" + loop_control: + label: "{{ item.name }}" diff --git a/roles/nginx/tasks/main/04_config_deploy.yaml b/roles/nginx/tasks/main/04_config_deploy.yaml index 2f0c834..38dbfc1 100644 --- a/roles/nginx/tasks/main/04_config_deploy.yaml +++ b/roles/nginx/tasks/main/04_config_deploy.yaml @@ -127,12 +127,16 @@ group: root become: true loop: "{{ nginx__configurations }}" + loop_control: + label: "{{ item.name }}" notify: Restart nginx - name: Add names with suffixes from `nginx__configurations` to `nginx__config_files_to_exist` fact ansible.builtin.set_fact: nginx__config_files_to_exist: "{{ nginx__config_files_to_exist + [ item.name + '.conf' ] }}" # noqa: jinja[spacing] loop: "{{ nginx__configurations }}" + loop_control: + label: "{{ item.name }}" - name: Find configuration files to remove ansible.builtin.find: @@ -147,4 +151,6 @@ state: absent become: true loop: "{{ nginx__config_files_to_remove.files }}" + loop_control: + label: "{{ item.path | ansible.builtin.basename }}" notify: Restart nginx From fce4c2f73b49ab34f3061edd91a7e3c569b7823e Mon Sep 17 00:00:00 2001 From: June Date: Tue, 18 Feb 2025 15:38:07 +0100 Subject: [PATCH 44/46] grafana(host): account in Prom. hyperv. disk alerts for longer backups Set duration for Prometheus hypervisor disk rw rate and hard disk io alerts to 2h to account for the very long running (over 90m) backup job. --- .../grafana/docker_compose/prometheus_alerts.rules.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml index f684385..5ec53b8 100644 --- a/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml +++ b/resources/chaosknoten/grafana/docker_compose/prometheus_alerts.rules.yaml @@ -166,7 +166,7 @@ groups: # Longer intervals to account for disk intensive hypervisor tasks (backups, moving VMs, etc.). - alert: HypervisorHostUnusualDiskReadRate expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: @@ -174,7 +174,7 @@ groups: description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}" - alert: HypervisorHostUnusualDiskWriteRate expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: @@ -256,7 +256,7 @@ groups: # Since hard disks on the hypervisor can easily have their IO saturated by hypervisor tasks (backups, moving VMs, etc.), alert when the IO is above the regular threshold for a very long time. - alert: HypervisorHostUnusualHardDiskIo expr: (rate(node_disk_io_time_seconds_total{device=~"s.+"}[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename="chaosknoten"} - for: 90m + for: 2h labels: severity: warning annotations: From 2b0d99eb23fa70d671242069bdea4773f82f9911 Mon Sep 17 00:00:00 2001 From: c6ristian Date: Sat, 22 Feb 2025 22:56:40 +0100 Subject: [PATCH 45/46] keycloak(host): update to 26.1 & postgres to 15.12 --- resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 b/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 index 9509654..231f581 100644 --- a/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 +++ b/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 @@ -22,7 +22,7 @@ services: keycloak: - image: git.hamburg.ccc.de/ccchh/oci-images/keycloak:26.0 + image: git.hamburg.ccc.de/ccchh/oci-images/keycloak:26.1 pull_policy: always restart: unless-stopped command: start --optimized @@ -46,7 +46,7 @@ services: - "8080:8080" db: - image: postgres:15.2 + image: postgres:15.12 restart: unless-stopped networks: - keycloak From 98972e39c4e7ce319aadef0e8758fb35bcbef138 Mon Sep 17 00:00:00 2001 From: c6ristian Date: Sat, 22 Feb 2025 22:56:40 +0100 Subject: [PATCH 46/46] keycloak(host): update to 26.1 & postgres to 15.12 --- resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 b/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 index 9509654..231f581 100644 --- a/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 +++ b/resources/chaosknoten/keycloak/docker_compose/compose.yaml.j2 @@ -22,7 +22,7 @@ services: keycloak: - image: git.hamburg.ccc.de/ccchh/oci-images/keycloak:26.0 + image: git.hamburg.ccc.de/ccchh/oci-images/keycloak:26.1 pull_policy: always restart: unless-stopped command: start --optimized @@ -46,7 +46,7 @@ services: - "8080:8080" db: - image: postgres:15.2 + image: postgres:15.12 restart: unless-stopped networks: - keycloak