#!/bin/bash set -euo pipefail domain=eh22.easterhegg.eu ## fetch pages set +e wget -r --domains=$domain --adjust-extension --page-requisites --convert-links https://$domain/start set -e ## cleanup echo "-- cleaning up the HTML" cd $domain ## remove intern rm -f intern:start.html ## fix wget breaking internal anchor links for page in *.html; do sed "s/$page#/#/g" $page > $page.patched && mv $page.patched $page done; ## cleanup dokuwiki UI: remove "Website-Werkzeuge", "Benutzer-Werkzeuge" and login link purge_start_end() { file=$1 start=$2 end=$3 awk "/$start/,/$end/ { next } 1" $file > $file.patched && mv $file.patched $file } purge_line() { file=$1 line=$2 awk "!/$line/" $file > $file.patched && mv $file.patched $file } for page in *.html; do ## -> login link in header purge_start_end $page '