Χρήστης:ArielGlenn/getsearch.sh
Μετάβαση στην πλοήγηση
Πήδηση στην αναζήτηση
#!/bin/bash usage() { echo "Usage: $0 string" echo "where string is the search string for which to retrieve titles" echo echo "For example:" echo "$0 'noentry'"; exit 1 } if [ -z "$1" ]; then usage fi search=`echo "$1" | sed -e 's/ /_/g;'` tmp="./search_tmp" today=`date +"%B-%e-%Y"` ext="$today" mkdir -p $tmp titles="$tmp/titles.$ext" srcontinue="" rm -f $titles.* count=1 while [ 1 ]; do echo getting search titles $count to $count+500 # επόμενοι 500 if [ -z "$sroffset" ]; then curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=search&srsearch=$search&srprop=title&srlimit=500&format=xml&srwhat=text" | sed -e 's/>/>\n/g;' > $titles.xml.temp else curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=search&srsearch=$search&srprop=title&sroffset=$sroffset&srlimit=500&format=xml&srwhat=text" | sed -e 's/>/>\n/g;' > $titles.xml.temp fi if [ $? -ne 0 ]; then echo "Error $? from curl, unable to get xml pages, bailing" exit 1 fi cat $titles.xml.temp >> $titles.xml # get continue param # format: <search sroffset="500" /> sroffset=`grep sroffset $titles.xml.temp` if [ -z "$sroffset" ]; then break; else sroffset=`echo $sroffset | awk -F'"' '{ print $2 }'` fi sleep 6 count=$(( $count+500 )) done # format <p ns="0" title="μερικοί" /> cat $titles.xml | grep '<p ns' | awk -F'"' '{ print $4 }' | sed -e 's/^/[[/g; s/$/]]/g;' > $titles.txt # done! echo "done!" exit 0