#!/bin/bash exeDir=$(cd "$(dirname "$0")"; pwd) cd "$exeDir" REPLICATE_URL=$(grep -oP "REPLICATE_URL[',\s]*['\"]\Khttps?://[^'\"]+" config.php) echo "Using replicate URL: $REPLICATE_URL" export REPLICATE_URL lastSeqNum=$(php get_sync_state.php) curl -sL "$REPLICATE_URL/state.txt" -o state.txt newSeqNum=$(grep "^sequenceNumber=" "state.txt" | cut -d'=' -f2-) echo $newSeqNum if [ "$newSeqNum" -eq "$lastSeqNum" ]; then echo "No new file to be processed" exit 0 fi mkdir -p logs seq "$((lastSeqNum + 1))" "$newSeqNum" > seq_list.txt cat seq_list.txt | xargs -n1 -P10 -I{} bash -c ' seqnum="$1" f=$(printf "%09d" "$seqnum") path=$(echo "$f" | sed -E "s#(...)(...)(...)#\1/\2/\3#") url="${REPLICATE_URL}/${path}.osc.gz" echo "Downloading $url" curl -sL "$url" -o "${seqnum}.osc.gz" && gunzip "${seqnum}.osc.gz" && python filter_osc.py ${seqnum} && rm ${seqnum}.osc ' _ {}