forked from kamba4/sunders
added changes to improve performance of sync
All checks were successful
Build (and tag) Images / build (push) Successful in 1m4s
All checks were successful
Build (and tag) Images / build (push) Successful in 1m4s
This commit is contained in:
parent
8612798373
commit
c768645960
5 changed files with 101 additions and 7 deletions
|
|
@ -22,11 +22,11 @@ mkdir -p logs
|
|||
seq "$((lastSeqNum + 1))" "$newSeqNum" > seq_list.txt
|
||||
|
||||
|
||||
cat seq_list.txt | xargs -n1 -P7 -I{} bash -c '
|
||||
cat seq_list.txt | xargs -n1 -P10 -I{} bash -c '
|
||||
seqnum="$1"
|
||||
f=$(printf "%09d" "$seqnum")
|
||||
path=$(echo "$f" | sed -E "s#(...)(...)(...)#\1/\2/\3#")
|
||||
url="${REPLICATE_URL}/${path}.osc.gz"
|
||||
echo "Downloading $url"
|
||||
curl -sL "$url" -o "${seqnum}.osc.gz" && gunzip "${seqnum}.osc.gz"
|
||||
curl -sL "$url" -o "${seqnum}.osc.gz" && gunzip "${seqnum}.osc.gz" && python filter_osc.py ${seqnum} && rm ${seqnum}.osc
|
||||
' _ {}
|
||||
|
|
|
|||
|
|
@ -50,21 +50,21 @@ curSeqNum=$lastSeqNum
|
|||
curSeqNum=$(( $curSeqNum + 1 ))
|
||||
while [ $curSeqNum -lt $newSeqNum ]
|
||||
do
|
||||
if [ -e "$curSeqNum.osc" ]
|
||||
if [ -e "${curSeqNum}_filtered.osc" ]
|
||||
then
|
||||
|
||||
|
||||
rm "change_file.osc"
|
||||
cp $curSeqNum.osc change_file.osc
|
||||
cp ${curSeqNum}_filtered.osc change_file.osc
|
||||
|
||||
php create_camera_update_statements.php $curSeqNum
|
||||
rm $curSeqNum.osc
|
||||
rm ${curSeqNum}_filtered.osc
|
||||
curSeqNum=$(( $curSeqNum + 1 ))
|
||||
else
|
||||
path=$(echo "$(printf "%09d" "$curSeqNum")" | sed -E "s#(...)(...)(...)#\1/\2/\3#")
|
||||
url="${REPLICATE_URL}/${path}.osc.gz"
|
||||
echo $url
|
||||
curl -sL "$url" -o "$curSeqNum.osc.gz" && gunzip "$curSeqNum.osc.gz"
|
||||
curl -sL "$url" -o "$curSeqNum.osc.gz" && gunzip "$curSeqNum.osc.gz" && python filter_osc.py ${curSeqNum} && rm ${curSeqNum}.osc
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
|
|||
90
data_handler/utils/filter_osc.py
Normal file
90
data_handler/utils/filter_osc.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
import sys
|
||||
import gzip
|
||||
from lxml import etree
|
||||
|
||||
def open_maybe_gzip(filename, mode="rb"):
|
||||
"""Open file normally or as gzip based on extension."""
|
||||
if filename.endswith(".gz"):
|
||||
return gzip.open(filename, mode)
|
||||
return open(filename, mode)
|
||||
|
||||
def process_file(number):
|
||||
input_file = f"{number}.osc"
|
||||
filtered_file = f"{number}_filtered.osc"
|
||||
deleted_file = f"/migrations/{number}_deleted.osc"
|
||||
|
||||
# --- open input and outputs ---
|
||||
with open_maybe_gzip(input_file, "rb") as infile, \
|
||||
open(filtered_file, "wb") as out_filt, \
|
||||
open(deleted_file, "wb") as out_del:
|
||||
|
||||
# XML headers
|
||||
out_filt.write(b"<?xml version='1.0' encoding='UTF-8'?>\n")
|
||||
out_filt.write(b"<osmChange version='0.6' generator='filter-script'>\n")
|
||||
|
||||
out_del.write(b"<?xml version='1.0' encoding='UTF-8'?>\n")
|
||||
out_del.write(b"<osmChange version='0.6' generator='filter-script'>\n")
|
||||
|
||||
context = etree.iterparse(infile, events=("start", "end"))
|
||||
current_section = None
|
||||
buffer_filt = None
|
||||
buffer_del = None
|
||||
|
||||
for event, elem in context:
|
||||
if event == "start" and elem.tag in ("create", "modify", "delete"):
|
||||
current_section = elem.tag
|
||||
if current_section == "delete":
|
||||
buffer_del = etree.Element("delete")
|
||||
else:
|
||||
buffer_filt = etree.Element(current_section)
|
||||
|
||||
elif event == "end" and elem.tag == "node" and current_section:
|
||||
node_copy = etree.fromstring(etree.tostring(elem))
|
||||
|
||||
if current_section in ("create", "modify"):
|
||||
tags = {t.get("k"): t.get("v") for t in elem.findall("tag")}
|
||||
if tags.get("surveillance:type") == "camera":
|
||||
buffer_filt.append(node_copy)
|
||||
|
||||
elif current_section == "delete":
|
||||
buffer_del.append(node_copy)
|
||||
|
||||
elem.clear()
|
||||
|
||||
elif event == "end" and elem.tag in ("create", "modify"):
|
||||
if buffer_filt is not None and len(buffer_filt):
|
||||
out_filt.write(f" <{current_section}>\n".encode())
|
||||
for node in buffer_filt:
|
||||
out_filt.write(etree.tostring(node, encoding="utf-8"))
|
||||
out_filt.write(b"\n")
|
||||
out_filt.write(f" </{current_section}>\n".encode())
|
||||
buffer_filt = None
|
||||
current_section = None
|
||||
elem.clear()
|
||||
|
||||
elif event == "end" and elem.tag == "delete":
|
||||
if buffer_del is not None and len(buffer_del):
|
||||
out_del.write(b" <delete>\n")
|
||||
for node in buffer_del:
|
||||
out_del.write(etree.tostring(node, encoding="utf-8"))
|
||||
out_del.write(b"\n")
|
||||
out_del.write(b" </delete>\n")
|
||||
buffer_del = None
|
||||
current_section = None
|
||||
elem.clear()
|
||||
|
||||
out_filt.write(b"</osmChange>\n")
|
||||
out_del.write(b"</osmChange>\n")
|
||||
|
||||
print(f"✅ Processed {input_file}")
|
||||
print(f" → {filtered_file} (camera nodes)")
|
||||
print(f" → {deleted_file} (delete nodes)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python filter_replication.py <replication_number>")
|
||||
print("Example: python filter_replication.py 123")
|
||||
sys.exit(1)
|
||||
|
||||
number = sys.argv[1]
|
||||
process_file(number)
|
||||
Loading…
Add table
Add a link
Reference in a new issue