Χρήστης:ArielGlenn/getembeddedin.sh
Μετάβαση στην πλοήγηση
Πήδηση στην αναζήτηση
#!/bin/bash usage() { echo "Usage: $0 title" echo "where title is the title of the page for which to find transclusions" echo echo "For example:" echo "$0 'Πρότυπο:ξεν-'"; exit 1 } if [ -z "$1" ]; then usage fi eititle=`echo "$1" | sed -e 's/ /_/g;'` tmp="./embed_tmp" today=`date +"%B-%e-%Y"` ext="$today" mkdir -p $tmp titles="$tmp/titles.$ext" eicontinue="" rm -f $titles.* count=1 while [ 1 ]; do echo getting embedded titles $count to $count+500 # επόμενοι 500 if [ -z "$eicontinue" ]; then curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=embeddedin&eititle=$eititle&eilimit=500&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp else curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=embeddedin&eititle=$eititle&eicontinue=$eicontinue&eilimit=500&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp fi if [ $? -ne 0 ]; then echo "Error $? from curl, unable to get xml pages, bailing" exit 1 fi cat $titles.xml.temp >> $titles.xml # get continue param # format: <embeddedin eicontinue="500" /> eicontinue=`grep eicontinue $titles.xml.temp` if [ -z "$eicontinue" ]; then break; else eicontinue=`echo $eicontinue | awk -F'"' '{ print $2 }'` fi sleep 6 count=$(( $count+500 )) done # format <ei pageid="1192" ns="0" title="θάλασσα" /> cat $titles.xml | grep '<ei page' | awk -F'"' '{ print $6 }' | sed -e 's/^/[[/g; s/$/]]/g;' > $titles.txt # done! echo "done!" exit 0