Spaces:

lep1
/

braille-recognition-model

Runtime error

App Files Files Community

braille-recognition-model / utils /angelina_utils.py

lep1

Upload 14 files

c6c3369 verified 25 days ago

raw

history blame contribute delete

8.73 kB

	#!/usr/bin/env python
	# -- coding: UTF-8 --
	"""
	Braille symbols declaration
	"""

	import numpy as np


	def angelina_label_map():
	# constants for special symbols label
	num_sign = "##"
	caps_sign = "CC"
	markout_sign = "XX"

	# general symbols common for various languages
	sym_map = {
	"256": ".",
	"2": ",",
	"25": ":",
	"26": "?",
	"23": ";",
	"235": "!",
	"2356": "()", # postprocess to (, ). Labeled as ((, )), ()
	"126": "(",
	"345": ")",
	"36": "-",
	"34": "/",
	"3456": num_sign,
	"123456": markout_sign,
	# '6': "en",
	# '46': "EN", # TODO only for Russian ?
	}

	# RU symbols
	alpha_map_RU = {
	"1": "а",
	"12": "б",
	"2456": "в",
	"1245": "г",
	"145": "д",
	"15": "е",
	"16": "ё",
	"245": "ж",
	"1356": "з",
	"24": "и",
	"12346": "й",
	"13": "к",
	"123": "л",
	"134": "м",
	"1345": "н", # preprocess to № if followed by number
	"135": "о",
	"1234": "п",
	"1235": "р",
	"234": "с",
	"2345": "т",
	"136": "у",
	"124": "ф",
	"125": "х",
	"14": "ц",
	"12345": "ч",
	"156": "ш",
	"1346": "щ",
	"12356": "ъ",
	"2346": "ы",
	"23456": "ь",
	"246": "э",
	"1256": "ю",
	"1246": "я",
	"45": caps_sign,
	"236": "«", # <<
	"356": "»", # >>
	"4": "'",
	"456": "\|",
	"346": "§", # mark as &&
	}

	# UZ symbols
	alpha_map_UZ = {
	**alpha_map_RU,
	"1236": "ў",
	"13456": "қ",
	"12456": "ғ",
	"1456": "ҳ",
	}

	# EN symbols
	alpha_map_EN = {
	"1": "a",
	"12": "b",
	"14": "c",
	"145": "d",
	"15": "e",
	"124": "f",
	"1245": "g",
	"125": "h",
	"24": "i",
	"245": "j",
	"13": "k",
	"123": "l",
	"134": "m",
	"1345": "n",
	"135": "o",
	"1234": "p",
	"12345": "q",
	"1235": "r",
	"234": "s",
	"2345": "t",
	"136": "u",
	"1236": "v",
	"2456": "w",
	"1346": "x",
	"13456": "y",
	"1356": "z",
	#'6': caps_sign, # TODO duplicate оf RU caps_sign
	"3": "'",
	"236": "«", # <<
	"356": "»", # >>
	# '236': '"', # mark as <<
	# '356': '"', # mark as >>
	}

	# UZL symbols
	alpha_map_UZL = {
	**alpha_map_EN,
	"1236": "o`",
	"12456": "g`",
	"156": "sh",
	"12345": "ch",
	}

	# Greek letters
	alpha_map_GR = {
	"1": "α",
	"12": "β",
	"1245": "γ",
	"145": "δ",
	"15": "ε",
	"1356": "ζ",
	"345": "η",
	"1456": "θ",
	"24": "ι",
	"13": "κ",
	"123": "λ",
	"134": "μ",
	"1345": "ν",
	"1346": "ξ",
	"135": "ο",
	"1234": "π",
	"1235": "ρ",
	"234": "σ",
	"2345": "τ",
	"13456": "υ",
	"124": "φ",
	"125": "χ",
	"12346": "ψ",
	"245": "ω",
	"46": caps_sign,
	"5": "'", # stress
	"126": "αι",
	"146": "ει",
	"246": "οι",
	"12456": "υι",
	"16": "αυ",
	"156": "ευ",
	"1256": "ηυ",
	"136": "ου",
	}

	# Latvian letters
	alpha_map_LV = {
	"1": "a",
	"16": "ā",
	"12": "b",
	"14": "c",
	"146": "č",
	"145": "d",
	"15": "e",
	"156": "ē",
	"124": "f",
	"1245": "g",
	"12456": "ģ",
	"125": "h",
	"24": "i",
	"246": "ī",
	"245": "j",
	"13": "k",
	"136": "ķ",
	"123": "l",
	"1236": "ļ",
	"134": "m",
	"1345": "n",
	"13456": "ņ",
	"135": "o",
	"1234": "p",
	"1235": "r",
	"234": "s",
	"2346": "š",
	"2345": "t",
	"34": "u",
	"346": "ū",
	"2456": "v",
	"345": "z",
	"3456": "ž",
	"46": caps_sign,
	}

	# PL symbols
	alpha_map_PL = {
	**alpha_map_EN,
	"16": "ą",
	"146": "ć",
	"156": "ę",
	"126": "ł",
	"1456": "ń",
	"346": "ó",
	"246": "ś",
	"2346": "ź",
	"12346": "ż",
	"3": ".",
	"256": "/",
	"34": "\\",
	}

	# DE symbols
	alpha_map_DE = {
	"236": '"',
	"356": '"',
	"35": "*",
	"235": "+",
	"2": ",",
	"36": "-",
	"3": ".",
	"346": "ie",
	"16": "au",
	"126": "eu",
	"146": "ei",
	"1456": "ch",
	"156": "sch",
	"1256": "ü",
	"246": "ö",
	"25": ":",
	"23": ";",
	"2356": "=",
	"26": "?",
	"345": "ä",
	"1": "a",
	"12": "b",
	"14": "c",
	"145": "d",
	"15": "e",
	"124": "f",
	"1245": "g",
	"125": "h",
	"24": "i",
	"245": "j",
	"13": "k",
	"123": "l",
	"134": "m",
	"1345": "n",
	"135": "o",
	"1234": "p",
	"12345": "q",
	"1235": "r",
	"234": "s",
	"2345": "t",
	"136": "u",
	"1236": "v",
	"2456": "w",
	"1346": "x",
	"13456": "y",
	"1356": "z",
	"34": "äu",
	"23456": "st",
	"2346": "ß",
	"46": caps_sign,
	}

	# Digit symbols (after num_sign)
	num_map = {
	"1": "1",
	"12": "2",
	"14": "3",
	"145": "4",
	"15": "5",
	"124": "6",
	"1245": "7",
	"125": "8",
	"24": "9",
	"245": "0",
	}

	# Digits in denominators of fraction
	num_denominator_map = {
	"2": "/1",
	"23": "/2",
	"25": "/3",
	"256": "/4",
	"26": "/5",
	"235": "/6",
	"2356": "/7",
	"236": "/8",
	"35": "/9",
	"356": "/0", # postprocess num 0 /0 to %
	}

	# Symbols for Math Braille (in Russian braille, I suppose)
	math_RU = {
	"2": ",", # decimal separator
	"3": "..", # postprocess to "." (thousand separator) if between digits else to * (multiplication).
	"235": "+",
	"36": "-",
	"236": "*",
	"256": "::", # postprocess to ":" (division).
	"246": "<",
	"135": ">",
	"2356": "=",
	"126": "(",
	"345": ")",
	"12356": "[",
	"23456": "]",
	"246": "{",
	"135": "}",
	"456": "\|",
	"6": "en",
	"46": "EN",
	}

	# Codes for dicts
	letter_dicts = {
	"SYM": sym_map,
	"RU": alpha_map_RU,
	"EN": alpha_map_EN,
	"EN2": alpha_map_EN,
	"GR": alpha_map_GR,
	"LV": alpha_map_LV,
	"PL": alpha_map_PL,
	"DE": alpha_map_DE,
	"UZ": alpha_map_UZ,
	"UZL": alpha_map_UZL,
	"NUM": num_map,
	"NUM_DENOMINATOR": num_denominator_map,
	"MATH_RU": math_RU,
	}

	return letter_dicts


	def reversed_mapping(angelina_map=angelina_label_map()):
	concat_dict = {}

	for key in angelina_map.keys():
	single_dict = angelina_map[key]
	# invert single_dict
	inverted_dict = {v: k for k, v in single_dict.items()}
	for k, v in inverted_dict.items():
	concat_dict[k] = v

	return concat_dict


	def transform_angelina_label(russian_label, mapping=reversed_mapping()):
	russian_label = russian_label.strip()
	if russian_label in mapping.keys():
	return mapping[russian_label]
	else:
	# remove "~" from russian_label if russian_label is numeric
	russian_label = russian_label.replace("~", "")
	if russian_label.isnumeric():
	return russian_label
	# other cases where letters are capitalized
	else:
	if russian_label.lower() in mapping.keys():
	return mapping[russian_label.lower()]
	# other cases where letters are special letters
	else:
	if russian_label == "CC" or russian_label == "CC": # capitalize
	return "46"
	elif russian_label == "ХХ": # markout
	return "123456"
	elif russian_label == ">>":
	return "356"
	elif russian_label == "<<":
	return "236"
	else:
	print(russian_label)
	return "46"