Χρήστης:Lou bot/tranInter2/script
(Ανακατεύθυνση από Χρήστης:Lou bot/tranInter/script)
tranInter_doubleTracker.py
#!/usr/bin/python # -*- coding: utf-8 -*- """ This bot goes over multiple pages of the home wiki, and edits them without changing. This is for example used to get category links in templates working. Don't forget to set the ftout to your current list of words, see below for a line that looks like: ftout =open('/home/cmillet/wikitruks/wiktio/all/2005-12-14.txt', 'r') This script understands various command-line arguments: -start: used as -start:page_name, specifies that the robot should go alphabetically through all pages on the home wiki, starting at the named page. -file: used as -file:file_name, read a list of pages to treat from the named textfile. Page titles should be enclosed in [[double-squared brackets]]. -ref: used as -start:page_name, specifies that the robot should touch all pages referring to the named page. -cat: used as -cat:category_name, specifies that the robot should touch all pages in the named category. All other parameters will be regarded as a page title; in this case, the bot will only touch a single page. """ import wikipedia, wiktionary, pagegenerators, catlib import sys import re tradMsg = u"{{-μτφ-}}" commentCompiler = re.compile( u"\<\!\-\-(.*?)\-\-\>", re.DOTALL | re.MULTILINE) # (eventually one ":") one *, one whitespace, one {{language code}}, then links, then newline char translntLineCompiler = re.compile("(^:?\* *\{\{(\w*?)\}\}(.*?\n))",re.MULTILINE) oldLinkCompiler = re.compile("\[\[(.*?)\]\]") beforeTranslntCompiler = re.compile("(\ *,?\ *$)") afterTranslntCompiler = re.compile("(^\ *,?\ *)") ''' listelng = ['aa', 'ab', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ch', 'chr', 'co', 'cr', 'cs', 'csb', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'ie', 'ik', 'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'ln', 'lo', 'lt', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mo', 'mr', 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no', 'oc', 'om', 'or', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu', 'rm', 'rn', 'ro', 'roa-rup', 'ru', 'rw', 'sa', 'sc', 'scn', 'sd', 'sg', 'sh', 'si', 'simple', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tlh', 'tn', 'to', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tw', 'ug', 'uk', 'ur', 'uz', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi', 'yo', 'za', 'zh', 'zh-min-nan', 'zu'] # αφαίρεση του dog: δεν υπάρχει βικιλεξικό σε αυτή τη γλώσσα nowiktiolng = ['dog','fil','grc'] ''' # Wiktionaries that makes the distinction between this and This : # (the list http://meta.wikimedia.org/wiki/Help:Page_name is not really uptodate) nocaplng = ['aa', 'ab', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ch', 'chr', 'co', 'cr', 'cs', 'csb', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'ie', 'ik', 'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'ln', 'lo', 'lt', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mo', 'mr', 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no', 'oc', 'om', 'or', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu', 'rm', 'rn', 'ro', 'roa-rup', 'ru', 'rw', 'sa', 'sc', 'scn', 'sd', 'sg', 'sh', 'si', 'simple', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tlh', 'tn', 'to', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tw', 'ug', 'uk', 'ur', 'uz', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi', 'yo', 'za', 'zh', 'zh-min-nan', 'zu'] #Wiktionary I checked that still capitalize there entries: # ln -- pt # ftout MUST BE SET correctly wordList = {} ftout =open('./2007-12-24.txt', 'r') line = ftout.readline() while (line): language, translation = line.split(":",1) if not wordList.has_key(language): wordList[language] = [] wordList[language].append(translation) line = ftout.readline() ftout.close() # εάν είναι {{-... ή [[... τότε μπαίνουμε σε άλλον τομέα, ή τέλος λίστας # if ( re.compile("^(\{\{-|\[\[)",re.M).match(newtext,curIdx) ): class TranslationBot: def __init__(self, generator, acceptall = False): self.generator = generator self.acceptall = acceptall def run(self): for page in self.generator: try: hasInterwikification = False hasDouble = False wikipedia.output('page: %s' % page.title()) thePage = page.get() theChangedPage = thePage # as newtext, but without comment # removing <!-- --> oldText = commentCompiler.sub(u"", thePage) # We need to do something here newText = oldText curIdx = newText.find(tradMsg, 0) while ( curIdx != -1 ): curIdx = curIdx + len(tradMsg) result = translntLineCompiler.search(oldText,curIdx) while (result and (result.group(2) != "cf")): completeLine = result.group(1) lang = result.group(2) analyzedPart = result.group(3) newLine = completeLine pattern = u'\{\{ξεν\|%s\|(.*?)\}\}'%lang transList = re.findall(pattern, analyzedPart) for translt in transList : # we are unable to process the cases in which there is # if '#' in translt: continue wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if not (wordList.has_key(lang)) or not ( tosearch in wordList[lang] ): hasInterwikification = True print "DEWIKIFICATION" new = u'{{ξεν-|%s|%s}}'%(lang,translt) old = u'{{ξεν|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) pattern = u'\{\{ξεν-\|%s\|(.*?)\}\}'%lang transList = re.findall(pattern , analyzedPart ) for translt in transList : if '#' in translt: continue wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if (wordList.has_key(lang)) and ( tosearch in wordList[lang] ): hasInterwikification = True print "INTERWIKIFICATION" old = u'{{ξεν-|%s|%s}}'%(lang,translt) new = u'{{ξεν|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) transList = oldLinkCompiler.findall(analyzedPart) for translt in transList : hasInterwikification = True wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if not ('#' in translt) and (wordList.has_key(lang)) and ( tosearch in wordList[lang] ): print "INTERWIKIFICATION" old = u'[[%s]]'%translt new = u'{{ξεν|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) else: print "REDEWIKIFICATION" old = u'[[%s]]'%translt new = u'{{ξεν-|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) intermediateLine = newLine # Double tracking (for linked translations) lineData = re.split('(\{\{ξεν\|%s\|.*?\}\})'%lang, intermediateLine) lineMetaData = [] for i in range(len(lineData)): if (re.match(u'(\{\{ξεν\|%s\|.*?\}\})'%lang, lineData[i])): lineMetaData.append("isATranslation") content = re.findall(u'\{\{ξεν\|%s\|(.*?)\}\}'%lang , lineData[i] ) lineData[i] = content[0] else: lineMetaData.append("isNotATranslation") checkedTranslation = {} i = 0 while (i != len(lineData)): if lineMetaData[i] == "isATranslation": lineMetaData.append("isATranslation") if not checkedTranslation.has_key(lineData[i]): checkedTranslation[lineData[i]] = i i = i + 1 else: hasDouble = True wikipedia.output(u'DOUBLON (%s)'%lineData[i]) lineData.pop(i) lineMetaData.pop(i) if (i != len(lineData)-1) and (lineMetaData[i] == "isNotATranslation"): lineData[i] = afterTranslntCompiler.sub("", lineData[i]) if (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = lineData[i-1] + lineData[i] lineData.pop(i) lineMetaData.pop(i) elif (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = beforeTranslntCompiler.sub("", lineData[i-1]) else: i = i + 1 newLine = "" for i in range(len(lineData)): if lineMetaData[i] == "isATranslation": newLine = newLine + u'{{ξεν|%s|%s}}'%(lang, lineData[i]) else: newLine = newLine + lineData[i] intermediateLine = newLine # Double tracking (for unlinked translations) lineData = re.split(u'(\{\{ξεν-\|%s\|.*?\}\})'%lang, intermediateLine) lineMetaData = [] for i in range(len(lineData)): if (re.match(u'(\{\{ξεν-\|%s\|.*?\}\})'%lang, lineData[i])): lineMetaData.append("isATranslation") content = re.findall(u'\{\{ξεν-\|%s\|(.*?)\}\}'%lang , lineData[i] ) lineData[i] = content[0] else: lineMetaData.append("isNotATranslation") checkedTranslation = {} i = 0 while (i != len(lineData)): if lineMetaData[i] == "isATranslation": lineMetaData.append("isATranslation") if not checkedTranslation.has_key(lineData[i]): checkedTranslation[lineData[i]] = i i = i + 1 else: hasDouble = True wikipedia.output(u'DOUBLON (%s)'%lineData[i]) lineData.pop(i) lineMetaData.pop(i) if (i != len(lineData)-1) and (lineMetaData[i] == "isNotATranslation"): lineData[i] = afterTranslntCompiler.sub("", lineData[i]) if (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = lineData[i-1] + lineData[i] lineData.pop(i) lineMetaData.pop(i) elif (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = beforeTranslntCompiler.sub("", lineData[i-1]) else: i = i + 1 newLine = "" for i in range(len(lineData)): if lineMetaData[i] == "isATranslation": newLine = newLine + u'{{ξεν-|%s|%s}}'%(lang, lineData[i]) else: newLine = newLine + lineData[i] # end of line analyze newText = newText.replace(completeLine, newLine) theChangedPage = theChangedPage.replace(completeLine, newLine) curIdx = result.end(3) result = translntLineCompiler.search(oldText,curIdx) # end of if not result curIdx = newText.find(tradMsg, curIdx) # end of while we are in the translation section # end of while {{-ξεν-}} # we upload the text if (newText == oldText) or (not hasInterwikification and not hasDouble): wikipedia.output('No changes were necessary in %s' % page.title()) else: if hasInterwikification and hasDouble: wikipedia.output(u'interwikification και αφαίρεση των διπλών') wikipedia.setAction(u'interwikification και αφαίρεση των διπλών μεταφράσεων (πρότυπο ξεν)') elif hasInterwikification: wikipedia.output(u'interwikification') wikipedia.setAction(u'interwikification των μεταφράσεων (πρότυπο ξεν)') else: wikipedia.output(u'αφαίρεση των διπλών') wikipedia.setAction(u'αφαίρεση των διπλών μεταφράσεων (πρότυπο ξεν)') wikipedia.output(u'>>> %s <<<' % page.title()) wikipedia.showDiff(thePage, theChangedPage) if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: print "put" page.put(theChangedPage) except wikipedia.NoPage: print "Page %s does not exist?!?!"%page.aslink() except wikipedia.IsRedirectPage: pass except wikipedia.LockedPage: pass def main(): #page generator gen = None pageTitle = [] for arg in wikipedia.handleArgs(): if arg: if arg.startswith('-start:'): gen = pagegenerators.AllpagesPageGenerator(arg[7:]) elif arg.startswith('-ref:'): referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(referredPage) elif arg.startswith('-links:'): linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:]) gen = pagegenerators.LinkedPageGenerator(linkingPage) elif arg.startswith('-file:'): gen = pagegenerators.TextfilePageGenerator(arg[6:]) elif arg.startswith('-cat:'): cat = catlib.Category(wikipedia.getSite(), arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) else: pageTitle.append(arg) if pageTitle: page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: wikipedia.showHelp('touch') else: preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TranslationBot(preloadingGen) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()
Δοκιμαστικό script
[επεξεργασία]#!/usr/bin/python # -*- coding: utf-8 -*- """ This bot goes over multiple pages of the home wiki, and edits them without changing. This is for example used to get category links in templates working. Don't forget to set the ftout to your current list of words, see below for a line that looks like: ftout =open('/home/cmillet/wikitruks/wiktio/all/2005-12-14.txt', 'r') This script understands various command-line arguments: -start: used as -start:page_name, specifies that the robot should go alphabetically through all pages on the home wiki, starting at the named page. -file: used as -file:file_name, read a list of pages to treat from the named textfile. Page titles should be enclosed in [[double-squared brackets]]. -ref: used as -start:page_name, specifies that the robot should touch all pages referring to the named page. -cat: used as -cat:category_name, specifies that the robot should touch all pages in the named category. All other parameters will be regarded as a page title; in this case, the bot will only touch a single page. """ import wikipedia, wiktionary, pagegenerators, catlib import sys import re tradMsg = u"{{-μτφ-}}" commentCompiler = re.compile( u"\<\!\-\-(.*?)\-\-\>", re.DOTALL | re.MULTILINE) # (eventually one ":") one *, one whitespace, one {{language code}}, then links, then newline char translntLineCompiler = re.compile("(^:?\* *\{\{(\w*?)\}\}(.*?\n))",re.MULTILINE) oldLinkCompiler = re.compile("\[\[(.*?)\]\]") beforeTranslntCompiler = re.compile("(\ *,?\ *$)") afterTranslntCompiler = re.compile("(^\ *,?\ *)") ''' listelng = ['aa', 'ab', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ch', 'chr', 'co', 'cr', 'cs', 'csb', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'ie', 'ik', 'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'ln', 'lo', 'lt', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mo', 'mr', 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no', 'oc', 'om', 'or', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu', 'rm', 'rn', 'ro', 'roa-rup', 'ru', 'rw', 'sa', 'sc', 'scn', 'sd', 'sg', 'sh', 'si', 'simple', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tlh', 'tn', 'to', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tw', 'ug', 'uk', 'ur', 'uz', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi', 'yo', 'za', 'zh', 'zh-min-nan', 'zu'] # αφαίρεση του dog: δεν υπάρχει βικιλεξικό σε αυτή τη γλώσσα nowiktiolng = ['dog','fil','grc'] ''' # Wiktionaries that makes the distinction between this and This : # (the list http://meta.wikimedia.org/wiki/Help:Page_name is not really uptodate) nocaplng = ['aa', 'ab', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ch', 'chr', 'co', 'cr', 'cs', 'csb', 'cy', 'da', 'de', 'dv', 'dz', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'ie', 'ik', 'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'ln', 'lo', 'lt', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mo', 'mr', 'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no', 'oc', 'om', 'or', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu', 'rm', 'rn', 'ro', 'roa-rup', 'ru', 'rw', 'sa', 'sc', 'scn', 'sd', 'sg', 'sh', 'si', 'simple', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tlh', 'tn', 'to', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tw', 'ug', 'uk', 'ur', 'uz', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi', 'yo', 'za', 'zh', 'zh-min-nan', 'zu'] #Wiktionary I checked that still capitalize there entries: # ln -- pt # ftout MUST BE SET correctly wordList = {} ftout =open('./2007-12-31.txt', 'r') line = ftout.readline() while (line): language, translation = line.split(":",1) if not wordList.has_key(language): wordList[language] = [] wordList[language].append(translation) line = ftout.readline() ftout.close() # εάν είναι {{-... ή [[... τότε μπαίνουμε σε άλλον τομέα, ή τέλος λίστας # if ( re.compile("^(\{\{-|\[\[)",re.M).match(newtext,curIdx) ): class TranslationBot: def __init__(self, generator, acceptall = False): self.generator = generator self.acceptall = acceptall def run(self): for page in self.generator: try: hasInterwikification = False hasDouble = False wikipedia.output('page: %s' % page.title()) thePage = page.get() theChangedPage = thePage # as newtext, but without comment # removing <!-- --> oldText = commentCompiler.sub(u"", thePage) # We need to do something here newText = oldText curIdx = newText.find(tradMsg, 0) while ( curIdx != -1 ): curIdx = curIdx + len(tradMsg) result = translntLineCompiler.search(oldText,curIdx) while (result and (result.group(2) != "cf")): completeLine = result.group(1) lang = result.group(2) analyzedPart = result.group(3) newLine = completeLine pattern = u'\{\{ξεν\|%s\|(.*?)\}\}'%lang transList = re.findall(pattern, analyzedPart) for translt in transList : # we are unable to process the cases in which there is # if '#' in translt: continue wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if not (wordList.has_key(lang)) or not ( tosearch in wordList[lang] ): hasInterwikification = True print "DEWIKIFICATION" new = u'{{ξεν-|%s|%s}}'%(lang,translt) old = u'{{ξεν|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) pattern = u'\{\{ξεν-\|%s\|(.*?)\}\}'%lang transList = re.findall(pattern , analyzedPart ) for translt in transList : if '#' in translt: continue wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if (wordList.has_key(lang)) and ( tosearch in wordList[lang] ): hasInterwikification = True print "INTERWIKIFICATION" old = u'{{ξεν-|%s|%s}}'%(lang,translt) new = u'{{ξεν|%s|%s}}'%(lang,translt) newLine = newLine.replace(old , new) transList = oldLinkCompiler.findall(analyzedPart) for translt in transList : hasInterwikification = True wikipedia.output(u'recherche de "%s:%s"'%(lang,translt) ) tosearch = u'%s\n'%translt tosearch = tosearch.encode('utf-8') if not ('#' in translt) and (wordList.has_key(lang)) and ( tosearch in wordList[lang] ): print "INTERWIKIFICATION" old = u'[[%s]]'%translt new = u'{{ξεν|%s|%s}}'%(lang,translt) # newLine = newLine.replace(old , new) else: print "REDEWIKIFICATION" old = u'[[%s]]'%translt new = u'{{ξεν-|%s|%s}}'%(lang,translt) # newLine = newLine.replace(old , new) intermediateLine = newLine # Double tracking (for linked translations) lineData = re.split(u'(\{\{ξεν\|%s\|.*?\}\})'%lang, intermediateLine) lineMetaData = [] for i in range(len(lineData)): if (re.match(u'(\{\{ξεν\|%s\|.*?\}\})'%lang, lineData[i])): lineMetaData.append("isATranslation") content = re.findall(u'\{\{ξεν\|%s\|(.*?)\}\}'%lang , lineData[i] ) lineData[i] = content[0] else: lineMetaData.append("isNotATranslation") checkedTranslation = {} i = 0 while (i != len(lineData)): if lineMetaData[i] == "isATranslation": lineMetaData.append("isATranslation") if not checkedTranslation.has_key(lineData[i]): checkedTranslation[lineData[i]] = i i = i + 1 else: hasDouble = True wikipedia.output(u'DOUBLON (%s)'%lineData[i]) lineData.pop(i) lineMetaData.pop(i) if (i != len(lineData)-1) and (lineMetaData[i] == "isNotATranslation"): lineData[i] = afterTranslntCompiler.sub("", lineData[i]) if (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = lineData[i-1] + lineData[i] lineData.pop(i) lineMetaData.pop(i) elif (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = beforeTranslntCompiler.sub("", lineData[i-1]) else: i = i + 1 newLine = "" for i in range(len(lineData)): if lineMetaData[i] == "isATranslation": newLine = newLine + u'{{ξεν|%s|%s}}'%(lang, lineData[i]) else: newLine = newLine + lineData[i] intermediateLine = newLine # Double tracking (for unlinked translations) lineData = re.split(u'(\{\{ξεν-\|%s\|.*?\}\})'%lang, intermediateLine) lineMetaData = [] for i in range(len(lineData)): if (re.match(u'(\{\{ξεν-\|%s\|.*?\}\})'%lang, lineData[i])): lineMetaData.append("isATranslation") content = re.findall(u'\{\{ξεν-\|%s\|(.*?)\}\}'%lang , lineData[i] ) lineData[i] = content[0] else: lineMetaData.append("isNotATranslation") checkedTranslation = {} i = 0 while (i != len(lineData)): if lineMetaData[i] == "isATranslation": lineMetaData.append("isATranslation") if not checkedTranslation.has_key(lineData[i]): checkedTranslation[lineData[i]] = i i = i + 1 else: hasDouble = True wikipedia.output(u'DOUBLON (%s)'%lineData[i]) lineData.pop(i) lineMetaData.pop(i) if (i != len(lineData)-1) and (lineMetaData[i] == "isNotATranslation"): lineData[i] = afterTranslntCompiler.sub("", lineData[i]) if (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = lineData[i-1] + lineData[i] lineData.pop(i) lineMetaData.pop(i) elif (i != 0) and (lineMetaData[i-1] == "isNotATranslation"): lineData[i-1] = beforeTranslntCompiler.sub("", lineData[i-1]) else: i = i + 1 newLine = "" for i in range(len(lineData)): if lineMetaData[i] == "isATranslation": newLine = newLine + u'{{ξεν-|%s|%s}}'%(lang, lineData[i]) else: newLine = newLine + lineData[i] # end of line analyze newText = newText.replace(completeLine, newLine) theChangedPage = theChangedPage.replace(completeLine, newLine) curIdx = result.end(3) result = translntLineCompiler.search(oldText,curIdx) # end of if not result curIdx = newText.find(tradMsg, curIdx) # end of while we are in the translation section # end of while {{-ξεν-}} # we upload the text if (newText == oldText) or (not hasInterwikification and not hasDouble): wikipedia.output('No changes were necessary in %s' % page.title()) else: if hasInterwikification and hasDouble: wikipedia.output(u'interwikification και αφαίρεση των διπλών') wikipedia.setAction(u'interwikification και αφαίρεση των διπλών μεταφράσεων (πρότυπο ξεν)') elif hasInterwikification: wikipedia.output(u'interwikification') wikipedia.setAction(u'interwikification των μεταφράσεων (πρότυπα ξεν, ξεν-)') else: wikipedia.output(u'αφαίρεση των διπλών') wikipedia.setAction(u'αφαίρεση των διπλών μεταφράσεων (πρότυπα ξεν, ξεν-)') wikipedia.output(u'>>> %s <<<' % page.title()) wikipedia.showDiff(thePage, theChangedPage) if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: print "put" page.put(theChangedPage) except wikipedia.NoPage: print "Page %s does not exist?!?!"%page.aslink() except wikipedia.IsRedirectPage: pass except wikipedia.LockedPage: pass def main(): #page generator gen = None pageTitle = [] for arg in wikipedia.handleArgs(): if arg: if arg.startswith('-start:'): gen = pagegenerators.AllpagesPageGenerator(arg[7:]) elif arg.startswith('-ref:'): referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(referredPage) elif arg.startswith('-links:'): linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:]) gen = pagegenerators.LinkedPageGenerator(linkingPage) elif arg.startswith('-file:'): gen = pagegenerators.TextfilePageGenerator(arg[6:]) elif arg.startswith('-cat:'): cat = catlib.Category(wikipedia.getSite(), arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) else: pageTitle.append(arg) if pageTitle: page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: wikipedia.showHelp('touch') else: preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = TranslationBot(preloadingGen) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()