Spaces:
Runtime error
Runtime error
def parse_verb_morphology(morph): | |
word_pos = "Verb" | |
offset = 7 | |
# Tense Consumption | |
if morph[3:7] == "PRES": | |
tense = "Present" | |
elif morph[3:7] == "IMPF": | |
tense = "Imperfect" | |
elif morph[3:7] == "PLUP": | |
tense = "Pluperfect" | |
elif morph[3:7] == "PERF": | |
tense = "Pluperfect" | |
elif morph[3:7] == "FUTP" and morph[3:13] != 'FUTPASSIVE': | |
tense = "FuturePerfect" | |
elif morph[3:6] == 'FUT': | |
offset = 6 | |
tense = "Future" | |
elif morph == "V99XXX0X": | |
# these should we just have a default? | |
tense = "Undeclined" | |
else: | |
tense = "PROBLEM" | |
# Voice consumption | |
if morph[offset:offset+6] == "ACTIVE": | |
voice = 'Active' | |
offset += 6 | |
elif morph[offset:offset+7] == "PASSIVE": | |
voice = 'Passive' | |
offset += 7 | |
elif morph[offset:offset+3] == 'IND' or morph[offset:offset+3] == 'SUB' or morph[offset:offset+3] == 'INF' or morph[offset:offset+3] == 'IMP': | |
# Deponent verbs - Such verbs occur in passive voice but are translated in active voice. | |
voice = 'Active' | |
else: | |
voice = 'PROBLEM' | |
# Mood consumption | |
if morph[offset:offset+3] == "SUB": | |
mood = 'Subjunctive' | |
elif morph[offset:offset+3] == "IND": | |
mood = 'Indicative' | |
elif morph[offset:offset+3] == "IMP": | |
mood = 'Imperative' | |
elif morph[offset:offset+3] == "INF": | |
mood = 'Infinitive' | |
else: | |
mood = 'PROBLEM' | |
offset += 3 | |
# Person consumption | |
if morph[offset] == '0': | |
person = 'Undeclined' | |
elif morph[offset] == '1': | |
person = 'First' | |
elif morph[offset] == '2': | |
person = 'Second' | |
elif morph[offset] == '3': | |
person = 'Third' | |
else: | |
person = "PROBLEM" | |
offset += 1 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'tense': tense, | |
'voice': voice, | |
'mood': mood, | |
'person': person, | |
'number': number | |
} | |
def parse_verb_participle_morphology(morph): | |
word_pos = "Participle" | |
offset = 6 | |
# Case consumption | |
if morph[offset:offset+3] == 'NOM': | |
v_case = 'Nominative' | |
elif morph[offset:offset+3] == 'GEN': | |
v_case = 'Genitive' | |
elif morph[offset:offset+3] == 'DAT': | |
v_case = 'Dative' | |
elif morph[offset:offset+3] == 'ACC': | |
v_case = 'Accusative' | |
elif morph[offset:offset+3] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:offset+3] == 'VOC': | |
v_case = 'Vocative' | |
elif morph[offset:offset+3] == 'LOC': | |
v_case = 'Locative' | |
else: | |
v_case = "PROBLEM" | |
offset += 3 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
offset += 1 | |
# Gender consumption | |
if morph[offset] == 'M': | |
gender = 'Masculine' | |
elif morph[offset] == 'F': | |
gender = 'Feminine' | |
elif morph[offset] == 'N': | |
gender = 'Neuter' | |
elif morph[offset] == 'X': | |
gender = 'Unknown' | |
elif morph[offset] == 'C': | |
gender = 'Common' | |
else: | |
gender = 'PROBLEM' | |
offset += 1 | |
if morph[offset:] == 'FUTPPL' or morph[offset:] == 'FUTACTIVEPPL': | |
participle_type = 'FutureActive' | |
elif morph[offset:] == 'PRESPPL' or morph[offset:] == 'PRESACTIVEPPL': | |
participle_type = 'Present Active' | |
elif morph[offset:] == 'PERFPPL' or morph[offset:] == 'PERFACTIVEPPL': | |
participle_type = 'PerfectActive' | |
elif morph[offset:] == 'FUTPASSIVEPPL': | |
participle_type = 'FuturePassive' | |
elif morph[offset:] == 'PERFPASSIVEPPL': | |
participle_type = 'PerfectPassive' | |
else: | |
participle_type = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'case': v_case, | |
'number': number, | |
'gender': gender, | |
'participle_type': participle_type | |
} | |
def parse_noun_morphology(morph): | |
word_pos = "Noun" | |
offset = 3 | |
# Case consumption | |
if morph[offset:offset+3] == 'NOM': | |
v_case = 'Nominative' | |
elif morph[offset:offset+3] == 'GEN': | |
v_case = 'Genitive' | |
elif morph[offset:offset+3] == 'DAT': | |
v_case = 'Dative' | |
elif morph[offset:offset+3] == 'ACC': | |
v_case = 'Accusative' | |
elif morph[offset:offset+3] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:offset+3] == 'VOC': | |
v_case = 'Vocative' | |
elif morph[offset:offset+3] == 'LOC': | |
v_case = 'Locative' | |
elif morph[offset] == 'X': | |
offset -= 2 | |
v_case = 'Undeclined' | |
else: | |
v_case = "PROBLEM" | |
offset += 3 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
offset += 1 | |
# Gender consumption | |
if morph[offset] == 'M': | |
gender = 'Masculine' | |
elif morph[offset] == 'F': | |
gender = 'Feminine' | |
elif morph[offset] == 'N': | |
gender = 'Neuter' | |
elif morph[offset] == 'X': | |
gender = 'Unknown' | |
elif morph[offset] == 'C': | |
gender = 'Common' | |
else: | |
gender = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'case': v_case, | |
'number': number, | |
'gender': gender, | |
} | |
def parse_adjective_morphology(morph): | |
word_pos = "Adjective" | |
offset = 5 | |
# Case consumption | |
if morph[offset:offset+3] == 'NOM': | |
v_case = 'Nominative' | |
elif morph[offset:offset+3] == 'GEN': | |
v_case = 'Genitive' | |
elif morph[offset:offset+3] == 'DAT': | |
v_case = 'Dative' | |
elif morph[offset:offset+3] == 'ACC': | |
v_case = 'Accusative' | |
elif morph[offset:offset+3] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:offset+3] == 'VOC': | |
v_case = 'Vocative' | |
elif morph[offset:offset+3] == 'LOC': | |
v_case = 'Locative' | |
elif morph[offset] == 'X': | |
offset -= 2 | |
v_case = 'Undeclined' | |
else: | |
v_case = "PROBLEM" | |
offset += 3 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
offset += 1 | |
# Gender consumption | |
if morph[offset] == 'M': | |
gender = 'Masculine' | |
elif morph[offset] == 'F': | |
gender = 'Feminine' | |
elif morph[offset] == 'N': | |
gender = 'Neuter' | |
elif morph[offset] == 'X': | |
gender = 'Unknown' | |
elif morph[offset] == 'C': | |
gender = 'Common' | |
else: | |
gender = 'PROBLEM' | |
offset += 1 | |
# Comparison consumption | |
if morph[offset:] == 'POS': | |
comparison = "Positive" | |
elif morph[offset:] == 'COMP': | |
comparison = "Comparative" | |
elif morph[offset:] == 'SUPER': | |
comparison = "Superlative" | |
elif morph[offset:] == 'X': | |
comparison = 'Unknown' | |
else: | |
comparison = "PROBLEM" | |
print(morph) | |
return { | |
'pos': word_pos, | |
'case': v_case, | |
'number': number, | |
'gender': gender, | |
'comparison': comparison | |
} | |
# PRON31NOMPM | |
def parse_pronoun_morphology(morph): | |
word_pos = "Pronoun" | |
offset = 6 | |
# Case consumption | |
if morph[offset:offset+3] == 'NOM': | |
v_case = 'Nominative' | |
elif morph[offset:offset+3] == 'GEN': | |
v_case = 'Genitive' | |
elif morph[offset:offset+3] == 'DAT': | |
v_case = 'Dative' | |
elif morph[offset:offset+3] == 'ACC': | |
v_case = 'Accusative' | |
elif morph[offset:offset+3] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:offset+3] == 'VOC': | |
v_case = 'Vocative' | |
elif morph[offset:offset+3] == 'LOC': | |
v_case = 'Locative' | |
elif morph[offset] == 'X': | |
offset -= 2 | |
v_case = 'Undeclined' | |
else: | |
v_case = "PROBLEM" | |
offset += 3 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
offset += 1 | |
# Gender consumption | |
if morph[offset] == 'M': | |
gender = 'Masculine' | |
elif morph[offset] == 'F': | |
gender = 'Feminine' | |
elif morph[offset] == 'N': | |
gender = 'Neuter' | |
elif morph[offset] == 'X': | |
gender = 'Unknown' | |
elif morph[offset] == 'C': | |
gender = 'Common' | |
else: | |
gender = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'case': v_case, | |
'number': number, | |
'gender': gender, | |
} | |
def parse_preposition_morphology(morph): | |
offset = 4 | |
word_pos = "Preposition" | |
if morph[offset:] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:] == 'ACC': | |
v_case = 'Accusative' | |
else: | |
print(morph[offset:]) | |
v_case = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'case': v_case | |
} | |
def parse_adverb_morphology(morph): | |
word_pos = "Adverb" | |
offset = 3 | |
if len(morph[offset:]) < 1: | |
comparison = "Positive" | |
# Comparison consumption | |
elif morph[offset:] == 'POS': | |
comparison = "Positive" | |
elif morph[offset:] == 'COMP': | |
comparison = "Comparative" | |
elif morph[offset:] == 'SUPER': | |
comparison = "Superlative" | |
elif morph[offset:] == 'X': | |
comparison = 'Unknown' | |
else: | |
comparison = "PROBLEM" | |
print(morph) | |
return { | |
'pos': word_pos, | |
'comparison': comparison | |
} | |
# PRON31NOMPM | |
def parse_supine_morphology(morph): | |
word_pos = "Supine" | |
offset = 8 | |
# Case consumption | |
if morph[offset:offset+3] == 'NOM': | |
v_case = 'Nominative' | |
elif morph[offset:offset+3] == 'GEN': | |
v_case = 'Genitive' | |
elif morph[offset:offset+3] == 'DAT': | |
v_case = 'Dative' | |
elif morph[offset:offset+3] == 'ACC': | |
v_case = 'Accusative' | |
elif morph[offset:offset+3] == 'ABL': | |
v_case = 'Ablative' | |
elif morph[offset:offset+3] == 'VOC': | |
v_case = 'Vocative' | |
elif morph[offset:offset+3] == 'LOC': | |
v_case = 'Locative' | |
elif morph[offset] == 'X': | |
offset -= 2 | |
v_case = 'Undeclined' | |
else: | |
v_case = "PROBLEM" | |
offset += 3 | |
# Number consumption | |
if morph[offset] == 'S': | |
number = 'Singular' | |
elif morph[offset] == 'P': | |
number = 'Plural' | |
elif morph[offset] == 'X': | |
number = 'Infinitive' | |
else: | |
number = 'PROBLEM' | |
offset += 1 | |
# Gender consumption | |
if morph[offset] == 'M': | |
gender = 'Masculine' | |
elif morph[offset] == 'F': | |
gender = 'Feminine' | |
elif morph[offset] == 'N': | |
gender = 'Neuter' | |
elif morph[offset] == 'X': | |
gender = 'Unknown' | |
elif morph[offset] == 'C': | |
gender = 'Common' | |
else: | |
gender = 'PROBLEM' | |
return { | |
'pos': word_pos, | |
'case': v_case, | |
'number': number, | |
'gender': gender, | |
} | |
def parse_morphology(morph): | |
if len(morph) < 2: | |
return {} | |
# Participle | |
if morph[0:4] == "VPAR": | |
return parse_verb_participle_morphology(morph) | |
# Adjective | |
elif morph[0:3] == 'ADJ': | |
return parse_adjective_morphology(morph) | |
# Adverb | |
elif morph[0:3] == 'ADV': | |
return parse_adverb_morphology(morph) | |
# Verb | |
elif morph[0] == 'V' and morph[1] in [str(p) for p in range(9)]: | |
return parse_verb_morphology(morph) | |
# Noun | |
elif morph[0] == 'N': | |
return parse_noun_morphology(morph) | |
# Pronoun | |
elif morph[0:4] == 'PRON': | |
return parse_pronoun_morphology(morph) | |
# Preposition | |
elif morph[0:4] == 'PREP': | |
return parse_preposition_morphology(morph) | |
# Supine | |
elif morph[0:6] == 'SUPINE': | |
return parse_supine_morphology(morph) | |
elif morph[0:6] == 'INTERJ': | |
return { | |
'pos': 'Interjection' | |
} | |
elif morph in ['TACKON', 'PREFIX', 'SUFFIX']: | |
return { | |
'pos': 'Enclitic' | |
} | |
elif morph in ['OTHER', 'CONJ']: | |
return { | |
'pos': morph.capitalize() | |
} | |
def morph_to_string(morph): | |
if morph == {} or morph is None: | |
return "" | |
if morph['pos'] == 'Verb': | |
return 'Verb_' + morph['tense'] + "_" + morph['voice'] + "_" + morph['mood'] + "_" + morph['person'] + "_" + morph['number'] | |
elif morph['pos'] == 'Participle': | |
return "Participle_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + "_" + morph['participle_type'] | |
elif morph['pos'] == 'Noun': | |
return "Noun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
elif morph['pos'] == 'Adjective': | |
return 'Adjective_' + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + '_' + morph['comparison'] | |
elif morph['pos'] == 'Pronoun': | |
return "Pronoun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
elif morph['pos'] == 'Preposition': | |
return "Preposition_" + morph['case'] | |
elif morph['pos'] == 'Adverb': | |
return "Adverb_" + morph['comparison'] | |
elif morph['pos'] == 'Supine': | |
return "Supine_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
elif morph['pos'] == 'Enclitic': | |
return 'Enclitic' | |
elif morph['pos'] == 'Other': | |
return 'OTHER' | |
elif morph['pos'] == 'Conj': | |
return 'Conjunction' | |
elif morph['pos'] == 'Interjection': | |
return 'Interjection' | |
def simplify_form(morph): | |
return morph_to_string(parse_morphology(morph)) |