braille-recognition-model / utils /angelina_utils.py
lep1's picture
Upload 14 files
c6c3369 verified
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Braille symbols declaration
"""
import numpy as np
def angelina_label_map():
# constants for special symbols label
num_sign = "##"
caps_sign = "CC"
markout_sign = "XX"
# general symbols common for various languages
sym_map = {
"256": ".",
"2": ",",
"25": ":",
"26": "?",
"23": ";",
"235": "!",
"2356": "()", # postprocess to (, ). Labeled as ((, )), ()
"126": "(",
"345": ")",
"36": "-",
"34": "/",
"3456": num_sign,
"123456": markout_sign,
# '6': "en",
# '46': "EN", # TODO only for Russian ?
}
# RU symbols
alpha_map_RU = {
"1": "а",
"12": "б",
"2456": "в",
"1245": "г",
"145": "д",
"15": "е",
"16": "ё",
"245": "ж",
"1356": "з",
"24": "и",
"12346": "й",
"13": "к",
"123": "л",
"134": "м",
"1345": "н", # preprocess to № if followed by number
"135": "о",
"1234": "п",
"1235": "р",
"234": "с",
"2345": "т",
"136": "у",
"124": "ф",
"125": "х",
"14": "ц",
"12345": "ч",
"156": "ш",
"1346": "щ",
"12356": "ъ",
"2346": "ы",
"23456": "ь",
"246": "э",
"1256": "ю",
"1246": "я",
"45": caps_sign,
"236": "«", # <<
"356": "»", # >>
"4": "'",
"456": "|",
"346": "§", # mark as &&
}
# UZ symbols
alpha_map_UZ = {
**alpha_map_RU,
"1236": "ў",
"13456": "қ",
"12456": "ғ",
"1456": "ҳ",
}
# EN symbols
alpha_map_EN = {
"1": "a",
"12": "b",
"14": "c",
"145": "d",
"15": "e",
"124": "f",
"1245": "g",
"125": "h",
"24": "i",
"245": "j",
"13": "k",
"123": "l",
"134": "m",
"1345": "n",
"135": "o",
"1234": "p",
"12345": "q",
"1235": "r",
"234": "s",
"2345": "t",
"136": "u",
"1236": "v",
"2456": "w",
"1346": "x",
"13456": "y",
"1356": "z",
#'6': caps_sign, # TODO duplicate оf RU caps_sign
"3": "'",
"236": "«", # <<
"356": "»", # >>
# '236': '"', # mark as <<
# '356': '"', # mark as >>
}
# UZL symbols
alpha_map_UZL = {
**alpha_map_EN,
"1236": "o`",
"12456": "g`",
"156": "sh",
"12345": "ch",
}
# Greek letters
alpha_map_GR = {
"1": "α",
"12": "β",
"1245": "γ",
"145": "δ",
"15": "ε",
"1356": "ζ",
"345": "η",
"1456": "θ",
"24": "ι",
"13": "κ",
"123": "λ",
"134": "μ",
"1345": "ν",
"1346": "ξ",
"135": "ο",
"1234": "π",
"1235": "ρ",
"234": "σ",
"2345": "τ",
"13456": "υ",
"124": "φ",
"125": "χ",
"12346": "ψ",
"245": "ω",
"46": caps_sign,
"5": "'", # stress
"126": "αι",
"146": "ει",
"246": "οι",
"12456": "υι",
"16": "αυ",
"156": "ευ",
"1256": "ηυ",
"136": "ου",
}
# Latvian letters
alpha_map_LV = {
"1": "a",
"16": "ā",
"12": "b",
"14": "c",
"146": "č",
"145": "d",
"15": "e",
"156": "ē",
"124": "f",
"1245": "g",
"12456": "ģ",
"125": "h",
"24": "i",
"246": "ī",
"245": "j",
"13": "k",
"136": "ķ",
"123": "l",
"1236": "ļ",
"134": "m",
"1345": "n",
"13456": "ņ",
"135": "o",
"1234": "p",
"1235": "r",
"234": "s",
"2346": "š",
"2345": "t",
"34": "u",
"346": "ū",
"2456": "v",
"345": "z",
"3456": "ž",
"46": caps_sign,
}
# PL symbols
alpha_map_PL = {
**alpha_map_EN,
"16": "ą",
"146": "ć",
"156": "ę",
"126": "ł",
"1456": "ń",
"346": "ó",
"246": "ś",
"2346": "ź",
"12346": "ż",
"3": ".",
"256": "/",
"34": "\\",
}
# DE symbols
alpha_map_DE = {
"236": '"',
"356": '"',
"35": "*",
"235": "+",
"2": ",",
"36": "-",
"3": ".",
"346": "ie",
"16": "au",
"126": "eu",
"146": "ei",
"1456": "ch",
"156": "sch",
"1256": "ü",
"246": "ö",
"25": ":",
"23": ";",
"2356": "=",
"26": "?",
"345": "ä",
"1": "a",
"12": "b",
"14": "c",
"145": "d",
"15": "e",
"124": "f",
"1245": "g",
"125": "h",
"24": "i",
"245": "j",
"13": "k",
"123": "l",
"134": "m",
"1345": "n",
"135": "o",
"1234": "p",
"12345": "q",
"1235": "r",
"234": "s",
"2345": "t",
"136": "u",
"1236": "v",
"2456": "w",
"1346": "x",
"13456": "y",
"1356": "z",
"34": "äu",
"23456": "st",
"2346": "ß",
"46": caps_sign,
}
# Digit symbols (after num_sign)
num_map = {
"1": "1",
"12": "2",
"14": "3",
"145": "4",
"15": "5",
"124": "6",
"1245": "7",
"125": "8",
"24": "9",
"245": "0",
}
# Digits in denominators of fraction
num_denominator_map = {
"2": "/1",
"23": "/2",
"25": "/3",
"256": "/4",
"26": "/5",
"235": "/6",
"2356": "/7",
"236": "/8",
"35": "/9",
"356": "/0", # postprocess num 0 /0 to %
}
# Symbols for Math Braille (in Russian braille, I suppose)
math_RU = {
"2": ",", # decimal separator
"3": "..", # postprocess to "." (thousand separator) if between digits else to * (multiplication).
"235": "+",
"36": "-",
"236": "*",
"256": "::", # postprocess to ":" (division).
"246": "<",
"135": ">",
"2356": "=",
"126": "(",
"345": ")",
"12356": "[",
"23456": "]",
"246": "{",
"135": "}",
"456": "|",
"6": "en",
"46": "EN",
}
# Codes for dicts
letter_dicts = {
"SYM": sym_map,
"RU": alpha_map_RU,
"EN": alpha_map_EN,
"EN2": alpha_map_EN,
"GR": alpha_map_GR,
"LV": alpha_map_LV,
"PL": alpha_map_PL,
"DE": alpha_map_DE,
"UZ": alpha_map_UZ,
"UZL": alpha_map_UZL,
"NUM": num_map,
"NUM_DENOMINATOR": num_denominator_map,
"MATH_RU": math_RU,
}
return letter_dicts
def reversed_mapping(angelina_map=angelina_label_map()):
concat_dict = {}
for key in angelina_map.keys():
single_dict = angelina_map[key]
# invert single_dict
inverted_dict = {v: k for k, v in single_dict.items()}
for k, v in inverted_dict.items():
concat_dict[k] = v
return concat_dict
def transform_angelina_label(russian_label, mapping=reversed_mapping()):
russian_label = russian_label.strip()
if russian_label in mapping.keys():
return mapping[russian_label]
else:
# remove "~" from russian_label if russian_label is numeric
russian_label = russian_label.replace("~", "")
if russian_label.isnumeric():
return russian_label
# other cases where letters are capitalized
else:
if russian_label.lower() in mapping.keys():
return mapping[russian_label.lower()]
# other cases where letters are special letters
else:
if russian_label == "CC" or russian_label == "CC": # capitalize
return "46"
elif russian_label == "ХХ": # markout
return "123456"
elif russian_label == ">>":
return "356"
elif russian_label == "<<":
return "236"
else:
print(russian_label)
return "46"