easterhegg-eh22-website/fetch.sh
2026-01-03 00:14:19 +01:00

78 lines
2.2 KiB
Bash
Executable file

#!/bin/bash
set -euo pipefail
domain=eh22.easterhegg.eu
## fetch pages
set +e
wget -r --domains=$domain --adjust-extension --page-requisites --convert-links https://$domain/start
set -e
## cleanup
echo "-- cleaning up the HTML"
cd $domain
## remove intern
rm -f intern:start.html
## fix wget breaking internal anchor links
for page in *.html; do
sed "s/$page#/#/g" $page > $page.patched && mv $page.patched $page
done;
## cleanup dokuwiki UI: remove "Website-Werkzeuge", "Benutzer-Werkzeuge" and login link
purge_start_end() {
file=$1
start=$2
end=$3
awk "/$start/,/$end/ { next } 1" $file > $file.patched && mv $file.patched $file
}
purge_line() {
file=$1
line=$2
awk "!/$line/" $file > $file.patched && mv $file.patched $file
}
for page in *.html; do
## -> login link in header
purge_start_end $page '<nav id="dokuwiki__usertools" class="nav-usertools ">' '<\/nav>'
purge_line $page '<a rel="nofollow" href="#dokuwiki__usertools">Benutzer-Werkzeuge<\/a><span class="sr-out"> \/<\/span>'
## -> website and user tools in sidebar
purge_start_end $page '<nav class="nav-usermenu">' '<\/nav>'
purge_start_end $page '<div class="nav"><a href="#sidebar-site-tools" role="heading" aria-level="2">' '<\/div>'
purge_start_end $page '<div class="nav-panel level1">' '<\/div>'
done;
for page in _detail/*.html; do
## -> login link in header
purge_start_end $page '<nav id="dokuwiki__usertools" class="nav-usertools ">' '<\/nav>'
purge_line $page '<a rel="nofollow" href="#dokuwiki__usertools">Benutzer-Werkzeuge<\/a><span class="sr-out"> \/<\/span>'
## -> website and user tools in sidebar
purge_start_end $page '<nav class="nav-usermenu">' '<\/nav>'
purge_start_end $page '<div class="nav"><a href="#sidebar-site-tools" role="heading" aria-level="2">' '<\/div>'
purge_start_end $page '<div class="nav-panel level1">' '<\/div>'
done;
for page in *.html; do
## -> taskrunner.php
purge_line $page 'taskrunner.php'
done;
for page in _detail/*.html; do
purge_line $page 'taskrunner.php'
done;
for page in _export/xhtml/*.html; do
purge_line $page 'taskrunner.php'
done;
purge_line lib/tpl/base.html 'taskrunner.php'
## cleanup downloaded get queries
rm -f *\?*.html
rm -f lib/exe/taskrunner.php*
## TODO: Add svg.php from inside CSS