File size: 3,872 Bytes
734a7ea d29fa84 734a7ea d29fa84 734a7ea d29fa84 734a7ea d29fa84 734a7ea d29fa84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import random
from umsc import UgMultiScriptConverter
import torchaudio
import string
import epitran
from difflib import SequenceMatcher
# Lists of Uyghur short and long texts
short_texts = [
"سالام", "رەھمەت", "ياخشىمۇسىز"
]
long_texts = [
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
"بىزنىڭ ئۆيدەپ تۆت تەكچە تۆتىلىسى تەكتەكچە"
]
# Front-End Utils
def generate_short_text(script_choice):
"""Generate a random Uyghur short text based on the type."""
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
text = random.choice(short_texts)
if script_choice == "Uyghur Latin":
return ug_arab_to_latn(text)
return text
def generate_long_text(script_choice):
"""Generate a random Uyghur long text based on the type."""
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
text = random.choice(long_texts)
if script_choice == "Uyghur Latin":
return ug_arab_to_latn(text)
return text
# ASR Utils
def load_and_resample_audio(file_path, target_rate):
"""Load audio and resample based on target sample rate"""
audio_input, sampling_rate = torchaudio.load(file_path)
if sampling_rate != target_rate:
resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
audio_input = resampler(audio_input)
return audio_input, target_rate
def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
"""
Calculate pronunciation accuracy between reference and ASR output text using Epitran.
Args:
reference_text (str): The ground truth text in Uyghur (Arabic script).
output_text (str): The ASR output text in Uyghur (Arabic script).
language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).
Returns:
float: Pronunciation accuracy as a percentage.
str: IPA transliteration of the reference text.
str: IPA transliteration of the output text.
"""
# Initialize Epitran for Uyghur (Arabic script)
ipa_converter = epitran.Epitran(language_code)
# Remove punctuation from both texts
reference_text_clean = remove_punctuation(reference_text)
output_text_clean = remove_punctuation(output_text)
# Transliterate both texts to IPA
reference_ipa = ipa_converter.transliterate(reference_text_clean)
output_ipa = ipa_converter.transliterate(output_text_clean)
# Calculate pronunciation accuracy using SequenceMatcher
matcher = SequenceMatcher(None, reference_ipa, output_ipa)
match_ratio = matcher.ratio() # This is the fraction of matching characters
# Convert to percentage
pronunciation_accuracy = match_ratio * 100
# Generate HTML for comparison
comparison_html = ""
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
ref_segment = reference_ipa[i1:i2]
out_segment = output_ipa[j1:j2]
if opcode == 'equal': # Matching characters
comparison_html += f'<span style="color: green">{ref_segment}</span>'
elif opcode == 'replace': # Mismatched characters
comparison_html += f'<span style="color: red">{ref_segment}</span>'
elif opcode == 'delete': # Characters in reference but not in output
comparison_html += f'<span style="color: red">{ref_segment}</span>'
elif opcode == 'insert': # Characters in output but not in reference
comparison_html += f'<span style="color: red">{out_segment}</span>'
return reference_ipa, output_ipa, comparison_html, pronunciation_accuracy
def remove_punctuation(text):
"""Helper function to remove punctuation from text."""
return text.translate(str.maketrans('', '', string.punctuation)) |