Χρήστης:Vanished user Xorisdtbdfgonugyfs/wikitools/basic.py

#!/usr/bin/env python3

import os, re, sys
import unicodedata

workingpath = os.path.dirname(os.path.realpath(__file__))

#sectionsre  :Αρχίζει με κάποιον αριθμό από ίσον (=) :AGROUP,
#έχει κάποιο κείμενο :BGROUP
#τελειώνει με ίδιο αριθμό = :AGROUP
#και ίσως με άχρηστο κείμενο :CGROUP
sectionsre = "(?P<AGROUP>=+)\s*?(?P<BGROUP>.+)\s*?(?P=AGROUP)(?P<CGROUP>.*)$"
sectionTFULLre = "(?P<TEMPLSTART>\{\{)(?P<TEMPLFULL>((?P<TEMPNAME>.*?)\|{1}(?P<PARAMS>.*))|(?P<TEMPNAMENOPARAMS>.*?))(?P<TEMPLEND>\}\})$"

sectionsepsregex = '(^\-\-\-\-\s*?\n)'

translationsre = "(?P<ΤΗΕTABLE>(?P<STARTLINE>\{\{μτφ-αρχή.*?\}\}{1}\s*?\n)(?P<THEBODY>.*?)(?P<ENDLINE>\{\{μτφ-τέλος\}\}\s*?\n){1})"

translinere =  '^([*]\s*?|<!--\s*?\*\s*?)\{\{(?P<LANGISO>.*?)\}\}\s*?:\s*?\{\{τ\|(?P=LANGISO)\|.*'
translinerewithlang =  '^([*]\s*?|<!--\s*?\*\s*?)\{\{(?P<LANGISO>.*?)\}\}\s*?:.*'

BAD_PART_NO_LANG = 'Μέρος του λόγου χωρίς παράμετρο γλώσσας.'
BAD_PART_WRONG_LANG = 'Μέρος του λόγου με λάθος παράμετρο γλώσσας.' 
BAD_SECTION_2 = 'Λάθος τίτλος ενότητας βάθους 2.'
BAD_SECTION_DEPTH = 'Λάθος βάθος ενότητας.'
BAD_SECTION_TITLE = 'Λάθος στον τίτλο της ενότητας.'
BAD_SECTION = 'Ακαθόριστη ενότητα.'

ERROR_BAD_SEPARATORS = 'Περιέχει στοιχεία ανάμεσα σε διαχωριστικό και την πρώτη υποενότητα!' 
ERROR_LANG_SECTION = 'Σφάλμα στη γλώσσα!'
ERROR_LANG_TWICE = 'Τουλάχιστον μία γλώσσα έχει διπλή ενότητα!'
ERROR_NO_LANG_SUBSECTIONS = 'Δεν υπάρχουν υποενότητες σε κάποια γλώσσα!'
ERROR_NOT_A_LEMMA = 'Δεν είναι λήμμα!'

LANG_SEPARATOR = '\n\n\n----\n\n'

class SectionedTitle:
    def __init__(self):
        self.title = ''
        self.markerforcomments = []
        self.removedcomments = []
        self.endcategories = []
        self.kleidaline = ''
        self.kleidalinesection = -1
        self.errors = []
        self.fixed = []
        self.langs = []
        self.sections = []

class AnySubSection:
    def __init__(self):
        self.depth = -1
        self.originalline = ''
        self.previouslangiso = None
        self.langiso = None
        self.langtitlekey = ''
        self.ispartofspeech = False
        self.garbages = []
        self.fixsectiontitle = False
        self.titletemplate = ''
        self.titletemplateparams = ''
        self.content = ''
        #headword το οποίο πρέπει να υπάρχει σε υποενότητες
        #που έχουν αναφορά στη γλώσσα
        self.headword = '' 
        self.changes = []

def Get_lang_subsections(sectioned):
    """Επιστρέφει λεξικό με τις ενότητες γλωσσών.
    
    """
    langs = {} #λεξικό με ενότητες γλωσσών
    distinctlangs = []
    sectionint = None
    for subsection in sectioned.sections[1:]:
        if subsection.depth == 2: # άρα ενότητα γλώσσας ή η τελική με τις αναφορές
            sectionint = len(langs) # αρίθμηση που ξεκινάει από το μηδέν
            langs[sectionint] = {} # λεξικό με τις υποενότητες της γλώσσας
            langs[sectionint]['subsections'] = [] # κατάλογος με τις υποενότητες της γλώσσας
            # αρχική είναι η τρέχουσα 
            langs[sectionint]['subsections'].append(subsection)
            if subsection.langiso != None:
                langs[sectionint]['iso'] = subsection.langiso
                langs[sectionint]['is_lang_section'] = True
                if subsection.langiso not in distinctlangs:
                    distinctlangs.append(subsection.langiso)
                else:
                    if not ERROR_LANG_TWICE in sectioned.errors:
                        sectioned.errors.append(ERROR_LANG_TWICE)
            else:
                langs[sectionint]['iso'] = ''
                langs[sectionint]['is_lang_section'] = False
        else: # άλλη υποενότητα
            if sectionint != None: # έχει ήδη μία τουλάχιστον ενότητα γλώσσας
                # πρόσθεσέ την στην τρέχουσα αν ήδη υπάρχει
                langs[sectionint]['subsections'].append(subsection)
            else: # δεν έχει υποενότητες γλώσσας ή ξεκινάει από «μη» γλώσσα
                sectioned.errors.append(ERROR_NO_LANG_SECTION)
                return sectioned, langs
    
    for alangint in langs:
        if langs[alangint]['is_lang_section']:
            if len(langs[alangint]['subsections']) < 2 :
                if not ERROR_NO_LANG_SUBSECTIONS in sectioned.errors:
                    sectioned.errors.append(ERROR_NO_LANG_SUBSECTIONS)   
    return sectioned, langs #μη ταξινομημένες ακόμα

def Get_sectioned_title(pagetitle, wikitext, languages, parts):
    #only ns=0 and not isredirect must be send here
    #no check is done here and errors will arise    
    sectioned = SectionedTitle()
    #Do not create sections for these pages in elwiktionary
    if pagetitle.startswith('Πύλη:'):
        sectioned.errors.append(ERROR_NOT_A_LEMMA)
        return sectioned
    if wikitext.strip().startswith('{{softredirect'):
        sectioned.errors.append(ERROR_NOT_A_LEMMA)
        return sectioned
    #σε λήμματα όπου έχει τοποθετηθεί περιεχόμενο
    # σε ενότητα στην οποία δεν ανήκει.
    #Για παράδειγμα στο λάμα οι εικόνες έχουν (είχαν;) τοποθετηθεί 
    # πάνω από την ενότητα αναφοράς (πάνω από την ενότητα ετυμολογίας 2 και 3).    
    if has_bad_section_separators(wikitext):        
        sectioned.errors.append(ERROR_BAD_SEPARATORS)
        return sectioned
    
    #Now proceed...
    sectioned = SectionedTitle()
    sectioned.title = pagetitle
    #removes and keeps the comments that span more than one line
    wikitext, sectioned.markerforcomments, sectioned.removedcomments = remove_noninline_comments(wikitext)
    #remove and keep categories at end
    wikitext, sectioned.endcategories = remove_category_like_from_end(wikitext)
    lastlangiso = ''
    splittedlines = wikitext.splitlines(True)
    thissection = AnySubSection()
    thissection.depth = 0
    sectionstitlestofix = ['Σημειώσεις', 'Αναφορές', 'Πηγές', 'παραπομπές']
    for line in splittedlines:
        sectionsrematch = re.match(sectionsre,line)
        if sectionsrematch: #νέα υποενότητα
            # πρόσθεσε την προηγούμενη στις ενότητες του λήμματος
            sectioned.sections.append(thissection)
            #και ξεκίνα νέα ενότητα
            thissection = AnySubSection()
            agroup = sectionsrematch.groupdict()['AGROUP'] #τα = που έχει στην αρχή και στο τέλος
            bgroup = sectionsrematch.groupdict()['BGROUP'] #το περιεχόμενο ανάμεσα στα =
            cgroup = sectionsrematch.groupdict()['CGROUP'] #περιεχόμενο μετά τα = που μπορεί να περιέχει επιπλέον = ή άλλα σκουπίδια
            thissection.depth = len(agroup) #αριθμός =
            thissection.originalline = line
            sectiontitle = bgroup.strip()
            if len(cgroup)>0:
                thissection.garbages.append(cgroup)
            if thissection.depth == 2:#πιθανόν ενότητα γλώσσας               
                if sectiontitle in sectionstitlestofix:
                    thissection.fixsectiontitle = True
                else:
                    #ελέγχουμε αν είναι του τύπου {{-ΧΧ-}} άρα είναι ενότητα γλώσσας
                    if sectiontitle.startswith('{{-') and sectiontitle.endswith('-}}') and len(sectiontitle) > 7:
                        possiblelang = sectiontitle[3:-3] # θεωρητικά το iso
                        #αν υπάρχει τέτοια γλώσσα
                        if possiblelang in languages:
                            thissection.langiso = possiblelang
                            lastlangiso = thissection.langiso
                            #δημιουργούμε το headword
                            if thissection.langiso in ['el', 'gkm', 'grc' ] or (not languages[thissection.langiso]['haswiktionary']):
                                thissection.headword = "'''{{PAGENAME}}'''"
                            else:
                                #TODO: διαφορετικό για γλώσσες όπως η sr-la
                                #ώστε να μην «χτυπάει» σε ελέγχους.
                                thissection.headword = '{{τ|' + thissection.langiso + '|{{PAGENAME}}}}'                                
                            thissection.langtitlekey = languages[thissection.langiso]['sortforlink']
                        else:
                            #δεν υπάρχει το iso
                            thissection.garbages.append(BAD_SECTION_2 )
                            sectioned.errors.append(ERROR_LANG_SECTION)

                    else:#κάποια άλλη ενότητα βάθους 2,
                        #μόνο η ενότητα {{αναφορές}} για την ώρα
                        if sectiontitle in ['{{αναφορές}}']:
                            pass
                        else:
                            print('sectiontitle',sectiontitle)
                            thissection.garbages.append(BAD_SECTION_2 )
            else:
                if thissection.depth > 2:#είναι ενότητα,
                    #και μάλιστα βάθους μεγαλύτερου από 2 άρα
                    #μέσα σε κάποια υποενότητα μιας γλώσσας
                    #είτε σωστής είτε λάθος
                    thissection.previouslangiso = lastlangiso
                    thematch = re.match(sectionTFULLre, sectiontitle)
                    if thematch:#βρήκαμε κάτι
                        if thematch.groupdict()['TEMPNAMENOPARAMS']:
                            thissection.titletemplate = thematch.groupdict()['TEMPNAMENOPARAMS'].strip()
                            thissection.ispartofspeech = (thissection.titletemplate in parts)
                        elif thematch.groupdict()['TEMPNAME']:
                            thissection.titletemplate = thematch.groupdict()['TEMPNAME'].strip()
                            thissection.ispartofspeech = (thissection.titletemplate in parts)
                            thissection.titletemplateparams = thematch.groupdict()['PARAMS'].strip()
                        else:
                            #ακαθόριστη ενότητα
                            #δεν έχει τίποτε για titletemplate
                            #thissection.ispartofspeech = False
                            thissection.garbages.append(BAD_SECTION)
                        if thissection.titletemplate in parts:
                            #είναι μέρος του λόγου άρα πρέπει να έχει παράμετρο γλώσσας
                            if thissection.titletemplateparams.strip() == '':
                                thissection.garbages.append(BAD_PART_NO_LANG)
                            else:
                                paramssplitted = thissection.titletemplateparams.split("|")
                                if paramssplitted[0].strip() != lastlangiso:
                                    thissection.garbages.append(BAD_PART_WRONG_LANG)
                    else:#δεν βρήκαμε κάτι
                        thissection.garbages.append(BAD_SECTION_TITLE)
                else: # ενότητα μικρότερη του 2                        
                    thissection.garbages.append(BAD_SECTION_DEPTH)
        else:
            thissection.content += line
            if "{{κλείδα-ελλ" in line:
                sectioned.kleidaline = line
                sectioned.kleidalinesection = len(sectioned.sections)

    sectioned.sections.append(thissection)    
    #check_some(sectioned, languages)
    return sectioned

def add_keys_to_languages(thelanguagesdict):
    """Προσθέτει στο λεξικό των γλωσσών κλειδί για τη σειρά στα λήμματα
    και κλειδί για τη σειρά στις μεταφράσεις.
    
    Προσθέτει ένα κλειδί που περιέχει κλείδα ταξινόμησης του link
    ώστε να μπορούν να ταξινομηθούν σωστά οι γλώσσες μέσα στο λήμμα
    και ένα κλειδί για την ταξινόμηση με βάση το όνομα της γλώσσας
    (π.χ. για τις μεταφράσεις)        
    """
    for aniso in thelanguagesdict:
        thelanguagesdict[aniso]['sortforlink'] = el_sort_key(thelanguagesdict[aniso]['link'])
        thelanguagesdict[aniso]['sortforname'] = el_sort_key(thelanguagesdict[aniso]['name'])
        if aniso == 'διεθ':#πρώτη στη σελίδα
            thelanguagesdict[aniso]['sortforlink'] = '0' + thelanguagesdict[aniso]['sortforlink']
        elif aniso == 'el':#πρώτη ή μετά το διεθ
            thelanguagesdict[aniso]['sortforlink'] = '1' + thelanguagesdict[aniso]['sortforlink']
        elif aniso == 'gkm':#μετά το el (λόγω χρονικής περιόδου)
            thelanguagesdict[aniso]['sortforlink'] = '2' + thelanguagesdict[aniso]['sortforlink']
        elif aniso == 'grc':#τελευταία
            thelanguagesdict[aniso]['sortforlink'] = '3' + thelanguagesdict[aniso]['sortforlink']

def el_decompose(char):
    supposedNFDchar = char
    while True:        
        ud = unicodedata.decomposition(supposedNFDchar)       
        splitted = ud.split()        
        if len(splitted)>1:
            supposedNFDchar = chr(int(splitted[0],16))
        else:
            charord = ord(supposedNFDchar)
            if charord == 962:charord = 960
            #if charord>944 and charord<970:
            return chr(charord) if (charord>944 and charord<970) else ''

def el_sort_key(thestring):
    thekey = ''
    for achar in thestring.lower():
        thekey += el_decompose(achar)
    return thekey

def fix_translation_section(thetext, languages):
    """Επέστρεψε τα περιεχόμενα της ενότητας μεταφράσεις,
    διορθώνοντας κάθε πίνακα που έχει σωστή αρχή και τέλος.    
    """
    middleline = '{{μτφ-μέση}}\n'
    for amatch in re.finditer(translationsre, thetext, flags = re.DOTALL):
        startline = amatch.groupdict()['STARTLINE']
        endline = amatch.groupdict()['ENDLINE']
        oldbody = amatch.groupdict()['THEBODY']
        oldtext = amatch.groupdict()['ΤΗΕTABLE']
        thesorted, badlines = sort_el_transl_table(oldbody, languages)
        middlenum = int(len(thesorted) // 2)  
        xcounter = 0
        newbody = ''
        for aline in thesorted:
            xcounter += 1
            if xcounter == middlenum:
                newbody += middleline
            newbody += aline[1]+'\n'
        newtext = startline + newbody + endline
        if len(badlines):
            newtext += '{{μτφ-κατάταξη}}' + '\n'
            for aline in badlines:
                newtext += aline + '\n'        
        thetext = thetext.replace(oldtext, newtext)
    return thetext

def get_a_marker(thetext):
    marker = '@#@'
    while True:
        if "'" + marker +"'" in thetext:
            marker = '@' + marker + '@'
        else:
            break
    return "'" + marker + "'"

def get_el_basics():
    with open(os.path.join(workingpath,"Languages.lua"), 'rt', encoding ='utf_8') as ftxt:
        languagesfrommodule = ftxt.read()
    with open(os.path.join(workingpath,"PartOfSpeech.lua"), 'rt', encoding ='utf_8') as ftxt:
        partsfrommodule = ftxt.read()
    languages = get_languages_from_string(languagesfrommodule)
    add_keys_to_languages(languages)
    parts = get_parts_from_string(partsfrommodule)
    return languages, parts    

def get_languages_from_string(theLUAstring):
    """Διάβασε τις γλώσσες από αλφαριθμητικό.
    Χρειάζεται το πρόσφατο αλφαριθμητικό από το Module:Languages.
    (όπως είναι, ολόκληρο το κείμενο στο Module)
    
    Επιστρέφει λεξικό με κλειδιά το iso της κάθε γλώσσας
    και κάθε κλειδί περιέχει λεξικό με κλειδιά για τα:    
    name, cat, link, from, frm, wiki, words, image
    """
    languages = {}
    langre = "Languages\['(?P<LANGISO>.*?)'\]\s*?=\s*?\{{1}(?P<INSIDE>.*?)\}{1}\s*?"
    specsre = "Languages\['(?P<LANGISO>.*?)'\]\s*?=\s*?\{{1}.*?\}{1}\s*?"
    namere = '{0!s}\s*?=+?\s*?(?P<START1>[\'|"])(?P<THEVALUE>.*?)(?P=START1)\s*?[,|\s|\b]*?'
    wiktExistsre = 'wikiExists\s*?=+?\s*?(?P<THEVALUE>true|false)\s*?[,|\s|\b]*?'
    for amatch in re.finditer(langre, theLUAstring, re.DOTALL):
        languages[amatch.group('LANGISO')] = {}
        for item in ['name', 'cat', 'link', 'from', 'frm', 'wiki', 'words', 'image']:
            namematch = re.search(namere.format(item,'{1}'), amatch.group('INSIDE'), re.DOTALL)
            if namematch:
                languages[amatch.group('LANGISO')][item] = namematch.group('THEVALUE')
            else:
                languages[amatch.group('LANGISO')][item] = ''
        #add if wiktionary exists. Different search (for: true/false, not for: 'avalue')
        namematch = re.search(wiktExistsre, amatch.group('INSIDE'), re.DOTALL)
        if namematch:
            languages[amatch.group('LANGISO')]['haswiktionary'] = (namematch.group('THEVALUE') == 'true')
        else:
            languages[amatch.group('LANGISO')]['haswiktionary'] = False
    return languages

def get_parts_from_string(theLUAstring):
    """Διάβασε τα μέρη του λόγου από αλφαριθμητικό.
    Χρειάζεται το πρόσφατο αλφαριθμητικό από το Module:PartOfSpeech.
    (όπως είναι, ολόκληρο το κείμενο στο Module)
    
    Επιστρέφει λεξικό με κλειδί το μέρος του λόγου
    και κάθε κλειδί περιέχει λεξικό με κλειδιά για τα επιμέρους.
    Αφέθηκε δυναμικό επειδή μπορεί να προσθέταμε και άλλα κλειδιά
    εκτός από τα:
    link, κατηγορία, inlangu      
    """
    parts = {}
    partre = "pos\['(?P<PART>.*)']\s*=\s*\{\s*\['link']\s*=\s*'(?P<PARTLINK>.+)'\s*,\s*\['κατηγορία']"
    splittedlines = theLUAstring.splitlines()
    for line in splittedlines:
        thematch = re.match(partre, line)
        if thematch:
            parts[thematch.group('PART')] = { 'link' : thematch.group('PARTLINK')}
    return parts

def has_bad_section_separators(thetext):    
    splitted = re.split(sectionsepsregex, thetext, flags = re.DOTALL|re.MULTILINE)
    if len(splitted)>1:
        for asplit in splitted[1:]:
            striped = asplit.strip()
            if striped != '' and striped != '----' and (not striped.startswith("==")):
                return True #found bad start
    return False

def recreate_with_sorted_langs(sectioned, langs, languages, parts):
    newtext = sectioned.sections[0].content.strip() + '\n'
    onlylangsections = [k for k in langs.keys() if langs[k]['iso']]
    nonlangsections = [k for k in langs.keys() if k not in onlylangsections]
    sectionsepsregex = '(\n*\-\-\-\-\s*?\n*)'
    langcounter = 0
    #αλφαριθμητική ταξινόμηση
    for k in sorted(onlylangsections, key=lambda k: langs[k]['subsections'][0].langtitlekey):
        langcounter += 1
        lastsubsection = langs[k]['subsections'][-1]
        # in the last subsection of this lang
        #remove subsection separator
        #WARNING: επεμβατική ενέργεια, αλλάζει το πλήθος των αλλαγών γραμμών
        lastsubsection.content = re.sub(sectionsepsregex,'', lastsubsection.content, flags = re.DOTALL|re.MULTILINE).rstrip()
        
        if langcounter < len(onlylangsections):
            lastsubsection.content = lastsubsection.content + LANG_SEPARATOR
        else:
            lastsubsection.content = lastsubsection.content.rstrip() + '\n\n'
        
        #starting with the lang subsection
        #print the subsection top line and the contents of each subsection 
        for x in langs[k]['subsections']:
            newtext += x.originalline
            newtext += x.content
    #append non lang sections
    for k in nonlangsections:
        for x in langs[k]['subsections']:
            newtext += x.originalline
            newtext += x.content        

    #replace multiline comments
    for amarker, oldtext in zip(sectioned.markerforcomments,sectioned.removedcomments):
        newtext = newtext.replace(amarker, oldtext)    
    #if a greek key is required add it
    #add removed categories
    for acategory in sectioned.endcategories:
        newtext += '\n' + acategory
    return newtext    

def remove_category_like_from_end(wikitext):
    categories = []
    catre = '\[\[\s*?[^:[]+?\s*?:[^[]+?\]\]$'
    #nocategoriestext = wikitext.strip()
    while True:
        thematch = re.search(catre, wikitext)
        if thematch:
            categories.append(thematch.group())
            wikitext = wikitext[:thematch.start()]
            if wikitext.endswith('\n'):
                wikitext = wikitext[:-1]
        else:
            return wikitext, categories

def remove_noninline_comments(thetext):
    """Removes comments that span multiple lines.

    """
    thenewtext = thetext[:]
    there = '(<!--[\s\S]*?(-->){1})'
    themarkers = []
    thematch = re.findall(there, thenewtext)
    thecomments = []
    if thematch:
        for amatch in thematch:
            if '\n' in amatch[0]:
                amarker = get_a_marker(thenewtext)
                #replace the first found
                thenewtext = thenewtext.replace(amatch[0],  amarker ,1)
                themarkers.append(amarker)
                thecomments.append(amatch[0])
    return thenewtext, themarkers, thecomments

def save_el_basics(modulelanguages,modulepartsofspeech):
    """Save text form modules.
    
    Require latest text from el.wiktionary from:
        Module:Languages
        Module:PartOfSpeech    
    """
    with open(os.path.join(workingpath,"Languages.lua"), 'w', encoding ='utf_8') as ftxt:
        ftxt.write(modulelanguages)
    with open(os.path.join(workingpath,"PartOfSpeech.lua"), 'w', encoding ='utf_8') as ftxt:
        ftxt.write(modulepartsofspeech)

def sort_el_transl_table(tablebody, languages):
    langkeys_in_normal = []
    normal_lines_in_dict = []
    bad_translation_lines = []
    middleline = ''
    for aline in tablebody.splitlines():
        theline = aline.rstrip()
        if theline.startswith('{{μτφ-μέση'):
            middleline = aline        
        elif theline == '':
            pass
        else:
            #check if has any lang start
            thematch = re.match(translinere, theline)
            if thematch:
                lang = thematch.groupdict()['LANGISO']
                if lang in languages:
                    normal_lines_in_dict.append((languages[lang]['sortforname'], theline))

                else:
                    bad_translation_lines.append(theline)
            else:
                bad_translation_lines.append(theline)
    return sorted(normal_lines_in_dict), bad_translation_lines

def test():
    print("basic python module for elwiktionary: TESTED OK")

def unsection_as_was(sectioned):
    """Επιστρέφει το κείμενο όπως ήταν αν δεν έχουν γίνει αλλαγές.
    
    Χρησιμοποιείται για έλεγχο της συνάρτησης Get_sectioned_title.
    """
    newtext = ''
    for asection in sectioned.sections:
        newtext += asection.originalline
        newtext += asection.content
    #replace non inline comments
    for amarker, oldtext in zip(sectioned.markerforcomments,sectioned.removedcomments):
        newtext = newtext.replace(amarker, oldtext)
    #add last category likes
    for acategory in sectioned.endcategories:
        newtext += '\n' + acategory
    return newtext

if __name__ == '__main__':    
    #pagetitle = "λάμα"
    pagetitle = "to"
    # διάβασε το περιεχόμενο από κάπου
    # π.χ., από αρχείο κειμένου:
    with open(pagetitle,'rt') as f:
        wikitext = f.read()
    # κράτα το για μελλοντικό έλεγχο
    oldtext = wikitext[:]
    languages, parts = get_el_basics()
    sections = Get_sectioned_title(pagetitle, wikitext, languages, parts)
    if len(sections.errors) > 0:
        print('sections.errors',sections.errors)
        print('Δεν θα γίνει περαιτέρω επεξεργασία.')
    else:
        for asection in sections.sections:
            print('asection.garbages',asection.garbages)
        print('sections.endcategories',sections.endcategories)
        print('sections.kleidaline',sections.kleidaline + "#")
        print('sections.kleidalinesection', sections.kleidalinesection)
        print("_________________________________________________________")
        
        #το Get_lang_subsections απλώς προσθέτει στο «sections» τα σφάλματα
        # υποτίθεται ότι δεν το μεταβάλλει καθόλου σε άλλα σημεία.
        #Επιπλέον το «langs» έχει πλέον αναφορές σε αυτό το νέο «sections»
        sections, langs = Get_lang_subsections(sections)
        if len(sections.errors) > 0:
            print('sections.errors',sections.errors)
            print('Δεν θα γίνει περαιτέρω επεξεργασία.')
        else:     
            onlylangsections = [k for k in langs.keys() if langs[k]['iso']]
            for k in onlylangsections:
                print("lang",k, langs[k]['iso'])
            nonlangsections = [k for k in langs.keys() if k not in onlylangsections]
            # κανονικά μόνο η ενότητα αναφορές, εφόσον είναι ανάμεσα σε δύο ίσον
            for k in nonlangsections:
                print("nonlang",langs[k]['subsections'][0].originalline )
            print("_________________________________________________________")
            #check here for problems that can be fixed
            #ex. fix translations order using:
            #for asection in sections:
            #    if asection.titletemplate == 'μεταφράσεις':
            #            asection.content = fix_translation_section(asection.content, languages)
            #
            #even fix the subsection originaltitleline if has problems
            #remove κλειδα-ελλ and append it at the end if a greek lemma
            #etc. etc.
            #and then recreate with sorted langs
            #TODO: end lang separator can be removed in the above actions
            # and the check for existance can be removed in the function below
            # but the append langseparator must remain
            recreated = recreate_with_sorted_langs(sections, langs, languages, parts)
            print(recreated + "#") #πρόσθετο «#» για έλεγχο κενών κλπ.
    #έλεγχος el_sort_key
    # print(el_sort_key("ἀϋτή"))
    # print(el_sort_key("-Ἀθηνᾷ"))
Χρήστης:Vanished user Xorisdtbdfgonugyfs/wikitools/basic.py

Μενού πλοήγησης

Αναζήτηση