Χρήστης:AtouBot/getnamespace.sh
Μετάβαση στην πλοήγηση
Πήδηση στην αναζήτηση
#!/bin/bash usage() { echo "Usage: $0 namespace" echo "where namespace is the number of the namespace from which to retrieve titles" echo echo "For example:" echo "$0 0 for the main namespace"; echo "$0 1 for the Talk namespace"; echo "$0 2 for the User namespace"; echo "$0 3 for the User talk namespace"; echo "$0 4 for the Βικιλεξικό namespace"; echo "$0 5 for the Βικιλεξικό talk namespace"; echo "$0 10 for the Template namespace"; echo "$0 11 for the Template talk namespace"; exit 1 } if [ -z "$1" ]; then usage fi namesp=`echo "$1" | sed -e 's/ /_/g;'` tmp="./namespace_tmp" today=`date +"%B-%d-%Y"` ext="$today" mkdir -p $tmp titles="$tmp/titles.$ext" apfrom="" step=500 rm -f $titles.* count=1 while [ 1 ]; do echo getting namespace $namesp titles $count to $count+$step # επόμενοι 500 ($step) echo "$titles.xml.temp" if [ -z "$apfrom" ]; then curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=allpages&apnamespace=$namesp&aplimit=$step&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp else #set -x curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=allpages&apnamespace=$namesp&apfrom=$apfrom&aplimit=$step&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp #set +x fi if [ $? -ne 0 ]; then echo "Error $? from curl, unable to get xml pages, bailing" exit 1 fi cat $titles.xml.temp >> $titles.xml # get continue param # format: <allpages apfrom="βήξιμο" /> apfrom=`grep apfrom $titles.xml.temp` if [ -z "$apfrom" ]; then break; else apfrom=`echo $apfrom | awk -F'"' '{ print $2 }' | sed -e 's/ /%20/g; s/&/%26/g; s/\#/%23/g;'` fi sleep 6 count=$(( $count+$step )) done # format <p pageid="37881" ns="0" title="βέρος" /> cat $titles.xml | grep '<p ' | awk -F'"' '{ print $6 }' | sed -e 's/^/[[/g; s/$/]]/g;' > $titles.txt # done! echo "done!" exit 0