diff --git a/data_handler/Containerfile b/data_handler/Containerfile
index f9a3f6c..5cdf7b9 100644
--- a/data_handler/Containerfile
+++ b/data_handler/Containerfile
@@ -1,5 +1,5 @@
FROM ghcr.io/asohh/fedora-minimal-containers/php-cli:43
-RUN microdnf install -y php-mysqli php-bcmath php-xml gunzip https://raw.githubusercontent.com/rpmsphere/x86_64/master/d/dcron-4.5-7.1.x86_64.rpm && microdnf clean all
+RUN microdnf install -y php-mysqli php-bcmath php-xml python-lxml gunzip https://raw.githubusercontent.com/rpmsphere/x86_64/master/d/dcron-4.5-7.1.x86_64.rpm && microdnf clean all
COPY ./utils/* /opt/
COPY ./data_init/*.sql /opt/init/init.sql
diff --git a/data_handler/utils/create_camera_update_statements_download.sh b/data_handler/utils/create_camera_update_statements_download.sh
index 17ccdbb..b55ad3b 100644
--- a/data_handler/utils/create_camera_update_statements_download.sh
+++ b/data_handler/utils/create_camera_update_statements_download.sh
@@ -22,11 +22,11 @@ mkdir -p logs
seq "$((lastSeqNum + 1))" "$newSeqNum" > seq_list.txt
-cat seq_list.txt | xargs -n1 -P7 -I{} bash -c '
+cat seq_list.txt | xargs -n1 -P10 -I{} bash -c '
seqnum="$1"
f=$(printf "%09d" "$seqnum")
path=$(echo "$f" | sed -E "s#(...)(...)(...)#\1/\2/\3#")
url="${REPLICATE_URL}/${path}.osc.gz"
echo "Downloading $url"
- curl -sL "$url" -o "${seqnum}.osc.gz" && gunzip "${seqnum}.osc.gz"
+ curl -sL "$url" -o "${seqnum}.osc.gz" && gunzip "${seqnum}.osc.gz" && python filter_osc.py ${seqnum} && rm ${seqnum}.osc
' _ {}
diff --git a/data_handler/utils/create_camera_update_statements_process.sh b/data_handler/utils/create_camera_update_statements_process.sh
index 87267e9..4744dff 100644
--- a/data_handler/utils/create_camera_update_statements_process.sh
+++ b/data_handler/utils/create_camera_update_statements_process.sh
@@ -50,21 +50,21 @@ curSeqNum=$lastSeqNum
curSeqNum=$(( $curSeqNum + 1 ))
while [ $curSeqNum -lt $newSeqNum ]
do
- if [ -e "$curSeqNum.osc" ]
+ if [ -e "${curSeqNum}_filtered.osc" ]
then
rm "change_file.osc"
- cp $curSeqNum.osc change_file.osc
+ cp ${curSeqNum}_filtered.osc change_file.osc
php create_camera_update_statements.php $curSeqNum
- rm $curSeqNum.osc
+ rm ${curSeqNum}_filtered.osc
curSeqNum=$(( $curSeqNum + 1 ))
else
path=$(echo "$(printf "%09d" "$curSeqNum")" | sed -E "s#(...)(...)(...)#\1/\2/\3#")
url="${REPLICATE_URL}/${path}.osc.gz"
echo $url
- curl -sL "$url" -o "$curSeqNum.osc.gz" && gunzip "$curSeqNum.osc.gz"
+ curl -sL "$url" -o "$curSeqNum.osc.gz" && gunzip "$curSeqNum.osc.gz" && python filter_osc.py ${curSeqNum} && rm ${curSeqNum}.osc
fi
done
diff --git a/data_handler/utils/filter_osc.py b/data_handler/utils/filter_osc.py
new file mode 100644
index 0000000..0baea64
--- /dev/null
+++ b/data_handler/utils/filter_osc.py
@@ -0,0 +1,90 @@
+import sys
+import gzip
+from lxml import etree
+
+def open_maybe_gzip(filename, mode="rb"):
+ """Open file normally or as gzip based on extension."""
+ if filename.endswith(".gz"):
+ return gzip.open(filename, mode)
+ return open(filename, mode)
+
+def process_file(number):
+ input_file = f"{number}.osc"
+ filtered_file = f"{number}_filtered.osc"
+ deleted_file = f"/migrations/{number}_deleted.osc"
+
+ # --- open input and outputs ---
+ with open_maybe_gzip(input_file, "rb") as infile, \
+ open(filtered_file, "wb") as out_filt, \
+ open(deleted_file, "wb") as out_del:
+
+ # XML headers
+ out_filt.write(b"\n")
+ out_filt.write(b"\n")
+
+ out_del.write(b"\n")
+ out_del.write(b"\n")
+
+ context = etree.iterparse(infile, events=("start", "end"))
+ current_section = None
+ buffer_filt = None
+ buffer_del = None
+
+ for event, elem in context:
+ if event == "start" and elem.tag in ("create", "modify", "delete"):
+ current_section = elem.tag
+ if current_section == "delete":
+ buffer_del = etree.Element("delete")
+ else:
+ buffer_filt = etree.Element(current_section)
+
+ elif event == "end" and elem.tag == "node" and current_section:
+ node_copy = etree.fromstring(etree.tostring(elem))
+
+ if current_section in ("create", "modify"):
+ tags = {t.get("k"): t.get("v") for t in elem.findall("tag")}
+ if tags.get("surveillance:type") == "camera":
+ buffer_filt.append(node_copy)
+
+ elif current_section == "delete":
+ buffer_del.append(node_copy)
+
+ elem.clear()
+
+ elif event == "end" and elem.tag in ("create", "modify"):
+ if buffer_filt is not None and len(buffer_filt):
+ out_filt.write(f" <{current_section}>\n".encode())
+ for node in buffer_filt:
+ out_filt.write(etree.tostring(node, encoding="utf-8"))
+ out_filt.write(b"\n")
+ out_filt.write(f" {current_section}>\n".encode())
+ buffer_filt = None
+ current_section = None
+ elem.clear()
+
+ elif event == "end" and elem.tag == "delete":
+ if buffer_del is not None and len(buffer_del):
+ out_del.write(b" \n")
+ for node in buffer_del:
+ out_del.write(etree.tostring(node, encoding="utf-8"))
+ out_del.write(b"\n")
+ out_del.write(b" \n")
+ buffer_del = None
+ current_section = None
+ elem.clear()
+
+ out_filt.write(b"\n")
+ out_del.write(b"\n")
+
+ print(f"✅ Processed {input_file}")
+ print(f" → {filtered_file} (camera nodes)")
+ print(f" → {deleted_file} (delete nodes)")
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ print("Usage: python filter_replication.py ")
+ print("Example: python filter_replication.py 123")
+ sys.exit(1)
+
+ number = sys.argv[1]
+ process_file(number)
diff --git a/docker-compose.yml b/docker-compose.yml
index 1d60cac..f64f158 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,6 +8,8 @@ services:
MYSQL_PASSWORD: camerapassword # ${{secrets.MYSQL_PASSWORD}}
ports:
- "3306:3306"
+ volumes:
+ - ./mariadb:/var/lib/mysql:Z
healthcheck:
test: ["CMD", "mariadb-admin", "ping", "-h", "localhost", "-uroot", "-prootpassword"]
interval: 10s
@@ -17,6 +19,7 @@ services:
web:
image: git.hamburg.ccc.de/ccchh/sunders/web:latest
+ build: ./web/
environment:
MYSQL_HOST: db
MYSQL_DB: camera # ${{secrets.MYSQL_DATABASE}}
@@ -34,6 +37,7 @@ services:
data_handler:
image: git.hamburg.ccc.de/ccchh/sunders/data_handler:latest
+ build: ./data_handler/
environment:
MYSQL_HOST: db
MYSQL_DB: camera # ${{secrets.MYSQL_DATABASE}}