File size: 3,872 Bytes
734a7ea
 
d29fa84
 
 
 
734a7ea
 
 
 
 
 
d29fa84
734a7ea
d29fa84
734a7ea
 
d29fa84
734a7ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d29fa84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import random
from umsc import UgMultiScriptConverter
import torchaudio
import string
import epitran
from difflib import SequenceMatcher

# Lists of Uyghur short and long texts
short_texts = [
    "سالام", "رەھمەت", "ياخشىمۇسىز"
]
long_texts = [
    "مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
    "يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
    "بىزنىڭ ئۆيدەپ تۆت تەكچە تۆتىلىسى تەكتەكچە"
]

# Front-End Utils
def generate_short_text(script_choice):
    """Generate a random Uyghur short text based on the type."""
    ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')

    text = random.choice(short_texts)
    if script_choice == "Uyghur Latin":
        return ug_arab_to_latn(text)
    return text

def generate_long_text(script_choice):
    """Generate a random Uyghur long text based on the type."""
    ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')

    text = random.choice(long_texts)
    if script_choice == "Uyghur Latin":
        return ug_arab_to_latn(text)
    return text

# ASR Utils
def load_and_resample_audio(file_path, target_rate):
    """Load audio and resample based on target sample rate"""
    audio_input, sampling_rate = torchaudio.load(file_path)
    if sampling_rate != target_rate:
        resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
        audio_input = resampler(audio_input)
    return audio_input, target_rate

def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
    """
    Calculate pronunciation accuracy between reference and ASR output text using Epitran.

    Args:
        reference_text (str): The ground truth text in Uyghur (Arabic script).
        output_text (str): The ASR output text in Uyghur (Arabic script).
        language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).

    Returns:
        float: Pronunciation accuracy as a percentage.
        str: IPA transliteration of the reference text.
        str: IPA transliteration of the output text.
    """
    # Initialize Epitran for Uyghur (Arabic script)
    ipa_converter = epitran.Epitran(language_code)

    # Remove punctuation from both texts
    reference_text_clean = remove_punctuation(reference_text)
    output_text_clean = remove_punctuation(output_text)

    # Transliterate both texts to IPA
    reference_ipa = ipa_converter.transliterate(reference_text_clean)
    output_ipa = ipa_converter.transliterate(output_text_clean)

    # Calculate pronunciation accuracy using SequenceMatcher
    matcher = SequenceMatcher(None, reference_ipa, output_ipa)
    match_ratio = matcher.ratio()  # This is the fraction of matching characters

    # Convert to percentage
    pronunciation_accuracy = match_ratio * 100

    # Generate HTML for comparison
    comparison_html = ""
    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
        ref_segment = reference_ipa[i1:i2]
        out_segment = output_ipa[j1:j2]

        if opcode == 'equal':  # Matching characters
            comparison_html += f'<span style="color: green">{ref_segment}</span>'
        elif opcode == 'replace':  # Mismatched characters
            comparison_html += f'<span style="color: red">{ref_segment}</span>'
        elif opcode == 'delete':  # Characters in reference but not in output
            comparison_html += f'<span style="color: red">{ref_segment}</span>'
        elif opcode == 'insert':  # Characters in output but not in reference
            comparison_html += f'<span style="color: red">{out_segment}</span>'

    return reference_ipa, output_ipa, comparison_html, pronunciation_accuracy

def remove_punctuation(text):
  """Helper function to remove punctuation from text."""
  return text.translate(str.maketrans('', '', string.punctuation))