File size: 4,268 Bytes
734a7ea
 
d29fa84
 
 
734a7ea
 
 
175f7a3
734a7ea
 
d29fa84
734a7ea
c0b2049
175f7a3
 
734a7ea
 
d29fa84
efdb44e
734a7ea
 
 
efdb44e
734a7ea
 
 
 
efdb44e
d29fa84
 
18f99c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d29fa84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43ce49e
d29fa84
 
 
 
 
c03de5c
 
d29fa84
43ce49e
 
d29fa84
 
aab3ee4
c03de5c
6a8d513
c03de5c
471da93
c03de5c
 
d29fa84
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import random
from umsc import UgMultiScriptConverter
import string
import epitran
from difflib import SequenceMatcher

# Lists of Uyghur short and long texts
short_texts = [
    "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
]
long_texts = [
    "مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
    "يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
    "بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە", 
    "تۆۋەندە ئالىمنىڭ تەرجىمىھالى بىلەن تونۇشۇپ ئۆتەيلى.",
    "شېئىردىكى تۇيغۇ ئورنىنى تاپالمىغان ئىستىلىستىكىلىق ۋاسىتە كۆزگە چېلىقمايدۇ."
]

# Front-End Utils
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
def generate_short_text(script_choice):
    """Generate a random Uyghur short text based on the type."""
    text = random.choice(short_texts)
    return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text

def generate_long_text(script_choice):
    """Generate a random Uyghur long text based on the type."""
    text = random.choice(long_texts)
    return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text

# ASR Utils
# def load_and_resample_audio(audio_data, target_rate):
#     """Load audio and resample based on target sample rate"""
#     if isinstance(audio_data, tuple):
#         # microphone
#         sampling_rate, audio_input = audio_data
#         audio_input = (audio_input / 32768.0).astype(np.float32)
#     elif isinstance(audio_data, str):
#         # file upload
#         audio_input, sampling_rate = torchaudio.load(audio_data)
#     else: 
#         return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
#     # Resample if needed
#     if sampling_rate != target_rate:
#         resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
#         audio_input = resampler(audio_input)

#     return audio_input, target_rate

def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
    """
    Calculate pronunciation accuracy between reference and ASR output text using Epitran.

    Args:
        reference_text (str): The ground truth text in Uyghur (Arabic script).
        output_text (str): The ASR output text in Uyghur (Arabic script).
        language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).

    Returns:
        float: Pronunciation accuracy as a percentage.
        str: IPA transliteration of the reference text.
        str: IPA transliteration of the output text.
    """
    # Initialize Epitran for Uyghur (Arabic script)
    ipa_converter = epitran.Epitran(language_code)

    # Remove punctuation from both texts
    reference_text_clean = remove_punctuation(reference_text)
    output_text_clean = remove_punctuation(output_text)

    # Transliterate both texts to IPA
    reference_ipa = ipa_converter.transliterate(reference_text_clean)
    output_ipa = ipa_converter.transliterate(output_text_clean)

    # Calculate pronunciation accuracy using SequenceMatcher
    matcher = SequenceMatcher(None, reference_text_clean, output_text_clean)
    match_ratio = matcher.ratio()  # This is the fraction of matching characters

    # Convert to percentage
    pronunciation_accuracy = match_ratio * 100

    # Generate Markdown-compatible styled text
    comparison_md = ""
    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
        ref_segment = reference_text_clean[i1:i2]
        out_segment = output_text_clean[j1:j2]

        if opcode == 'equal':  # Matching characters
            comparison_md += f'<span style="color: blue;">{ref_segment}</span>'
        elif opcode in ['replace', 'delete', 'insert']:  # Mismatched or missing
            comparison_md += f'<span style="color: orange;">{ref_segment}</span>'

    comparison_md = f"<div>{comparison_md}</div>"

    return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy

def remove_punctuation(text):
  """Helper function to remove punctuation from text."""
  return text.translate(str.maketrans('', '', string.punctuation))