File size: 5,060 Bytes
734a7ea 8e57d14 d29fa84 734a7ea 8e57d14 81cf146 f0e249a 734a7ea 175f7a3 734a7ea d29fa84 734a7ea c0b2049 77775b5 734a7ea 8e57d14 f0e249a efdb44e f0e249a 6b61ee0 f0e249a 734a7ea efdb44e 734a7ea efdb44e d29fa84 8e57d14 f0e249a 8e57d14 f0e249a 18f99c6 d29fa84 8e57d14 d29fa84 f0e249a 6b61ee0 8e57d14 6b61ee0 d29fa84 43ce49e d29fa84 534d3b0 8e57d14 534d3b0 ff91a06 d29fa84 43ce49e d29fa84 8e57d14 c03de5c 77775b5 ff91a06 c03de5c d29fa84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import random
from umsc import UgMultiScriptConverter
from googletrans import Translator, LANGCODES
import string
import epitran
from difflib import SequenceMatcher
# For googletrans 4.0.0-rc1
import httpcore
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
## Global Vars
# Lists of Uyghur short and long texts
short_texts = [
"سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
]
long_texts = [
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
"بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە",
"قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
]
# Initialize the translator
translator = Translator()
translation_choices = [L for L in LANGCODES]
# Initialize uyghur script converter
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
# Initialize Epitran for Uyghur (Arabic script)
ipa_converter = epitran.Epitran('uig-Arab')
## Front-End Utils
def generate_short_text(script_choice):
"""Generate a random Uyghur short text based on the type."""
text = random.choice(short_texts)
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
def generate_long_text(script_choice):
"""Generate a random Uyghur long text based on the type."""
text = random.choice(long_texts)
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
def translate_text(input_text, script_choice, target_language):
"""
Translate Uyghur text to the target language
"""
if script_choice == 'Uyghur Latin':
input_text = ug_latn_to_arab(input_text)
translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language])
return translated_text.text
## ASR Utils
def remove_punctuation(text):
"""Helper function to remove punctuation from text."""
extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Additional custom uyghur punctuation
all_punctuation = string.punctuation + extra_punctuation
return text.translate(str.maketrans('', '', all_punctuation))
# def load_and_resample_audio(audio_data, target_rate):
# """Load audio and resample based on target sample rate"""
# if isinstance(audio_data, tuple):
# # microphone
# sampling_rate, audio_input = audio_data
# audio_input = (audio_input / 32768.0).astype(np.float32)
# elif isinstance(audio_data, str):
# # file upload
# audio_input, sampling_rate = torchaudio.load(audio_data)
# else:
# return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
# # Resample if needed
# if sampling_rate != target_rate:
# resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
# audio_input = resampler(audio_input)
# return audio_input, target_rate
def calculate_pronunciation_accuracy(reference_text, output_text, script_choice):
"""
Calculate pronunciation accuracy between reference and ASR output text using Epitran.
"""
# make sure input text is arabic script for IPA conversion
if script_choice == 'Uyghur Latin':
reference_text = ug_latn_to_arab(reference_text)
# Remove punctuation from both texts
reference_text_clean = remove_punctuation(reference_text)
output_text_clean = remove_punctuation(output_text)
# Transliterate both texts to IPA
reference_ipa = ipa_converter.transliterate(reference_text_clean)
output_ipa = ipa_converter.transliterate(output_text_clean)
# Calculate pronunciation accuracy using SequenceMatcher
matcher = SequenceMatcher(None, reference_text_clean, output_text_clean)
match_ratio = matcher.ratio() # This is the fraction of matching characters
# Convert to percentage
pronunciation_accuracy = match_ratio * 100
# Convert reference back to original script for feedback output
if script_choice == 'Uyghur Latin':
reference_text_clean = ug_arab_to_latn(reference_text_clean)
# Generate Markdown-compatible styled text
comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header
comparison_md += "<div style='margin-top: 10px;'>\n" # Add some spacing
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
ref_segment = reference_text_clean[i1:i2]
out_segment = output_text_clean[j1:j2]
if opcode == 'equal': # Matching characters
comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>'
elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
comparison_md += "</div>"
return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy
|