Spaces:
Sleeping
Sleeping
import random | |
from umsc import UgMultiScriptConverter | |
from googletrans import Translator, LANGCODES | |
import string | |
import epitran | |
from difflib import SequenceMatcher | |
# For googletrans 4.0.0-rc1 | |
import httpcore | |
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy') | |
## Global Vars | |
# Lists of Uyghur short and long texts | |
short_texts = [ | |
"سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش" | |
] | |
long_texts = [ | |
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", | |
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.", | |
"بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە", | |
"قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ." | |
] | |
# Initialize the translator | |
translator = Translator() | |
translation_choices = [L for L in LANGCODES] | |
# Initialize uyghur script converter | |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') | |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS') | |
# Initialize Epitran for Uyghur (Arabic script) | |
ipa_converter = epitran.Epitran('uig-Arab') | |
## Front-End Utils | |
def generate_short_text(script_choice): | |
"""Generate a random Uyghur short text based on the type.""" | |
text = random.choice(short_texts) | |
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text | |
def generate_long_text(script_choice): | |
"""Generate a random Uyghur long text based on the type.""" | |
text = random.choice(long_texts) | |
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text | |
def translate_text(input_text, script_choice, target_language): | |
""" | |
Translate Uyghur text to the target language | |
""" | |
if script_choice == 'Uyghur Latin': | |
input_text = ug_latn_to_arab(input_text) | |
translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language]) | |
return translated_text.text | |
## ASR Utils | |
def remove_punctuation(text): | |
"""Helper function to remove punctuation from text.""" | |
extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Additional custom uyghur punctuation | |
all_punctuation = string.punctuation + extra_punctuation | |
return text.translate(str.maketrans('', '', all_punctuation)) | |
# def load_and_resample_audio(audio_data, target_rate): | |
# """Load audio and resample based on target sample rate""" | |
# if isinstance(audio_data, tuple): | |
# # microphone | |
# sampling_rate, audio_input = audio_data | |
# audio_input = (audio_input / 32768.0).astype(np.float32) | |
# elif isinstance(audio_data, str): | |
# # file upload | |
# audio_input, sampling_rate = torchaudio.load(audio_data) | |
# else: | |
# return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)) | |
# # Resample if needed | |
# if sampling_rate != target_rate: | |
# resampler = torchaudio.transforms.Resample(sampling_rate, target_rate) | |
# audio_input = resampler(audio_input) | |
# return audio_input, target_rate | |
def calculate_pronunciation_accuracy(reference_text, output_text, script_choice): | |
""" | |
Calculate pronunciation accuracy between reference and ASR output text using Epitran. | |
""" | |
# make sure input text is arabic script for IPA conversion | |
if script_choice == 'Uyghur Latin': | |
reference_text = ug_latn_to_arab(reference_text) | |
# Remove punctuation from both texts | |
reference_text_clean = remove_punctuation(reference_text) | |
output_text_clean = remove_punctuation(output_text) | |
# Transliterate both texts to IPA | |
reference_ipa = ipa_converter.transliterate(reference_text_clean) | |
output_ipa = ipa_converter.transliterate(output_text_clean) | |
# Calculate pronunciation accuracy using SequenceMatcher | |
matcher = SequenceMatcher(None, reference_text_clean, output_text_clean) | |
match_ratio = matcher.ratio() # This is the fraction of matching characters | |
# Convert to percentage | |
pronunciation_accuracy = match_ratio * 100 | |
# Convert reference back to original script for feedback output | |
if script_choice == 'Uyghur Latin': | |
reference_text_clean = ug_arab_to_latn(reference_text_clean) | |
# Generate Markdown-compatible styled text | |
comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header | |
comparison_md += "<div style='margin-top: 10px;'>\n" # Add some spacing | |
for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): | |
ref_segment = reference_text_clean[i1:i2] | |
out_segment = output_text_clean[j1:j2] | |
if opcode == 'equal': # Matching characters | |
comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>' | |
elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing | |
comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>' | |
comparison_md += "</div>" | |
return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy | |