File size: 5,060 Bytes
734a7ea
 
8e57d14
d29fa84
 
 
734a7ea
8e57d14
 
 
81cf146
f0e249a
734a7ea
 
175f7a3
734a7ea
 
d29fa84
734a7ea
c0b2049
77775b5
734a7ea
 
8e57d14
 
 
 
f0e249a
efdb44e
f0e249a
 
 
6b61ee0
f0e249a
 
734a7ea
 
 
efdb44e
734a7ea
 
 
 
efdb44e
d29fa84
8e57d14
 
 
 
 
 
 
 
f0e249a
 
 
 
8e57d14
f0e249a
 
 
 
18f99c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d29fa84
8e57d14
d29fa84
 
 
f0e249a
6b61ee0
8e57d14
6b61ee0
d29fa84
 
 
 
 
 
 
 
 
 
43ce49e
d29fa84
 
 
 
 
534d3b0
8e57d14
534d3b0
ff91a06
 
 
d29fa84
43ce49e
 
d29fa84
 
8e57d14
c03de5c
77775b5
ff91a06
c03de5c
 
d29fa84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import random
from umsc import UgMultiScriptConverter
from googletrans import Translator, LANGCODES
import string
import epitran
from difflib import SequenceMatcher

# For googletrans 4.0.0-rc1
import httpcore
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy') 

## Global Vars 
# Lists of Uyghur short and long texts
short_texts = [
    "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
]
long_texts = [
    "مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.",
    "يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.",
    "بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە", 
    "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
]

# Initialize the translator
translator = Translator()
translation_choices = [L for L in LANGCODES]

# Initialize uyghur script converter 
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')

# Initialize Epitran for Uyghur (Arabic script)
ipa_converter = epitran.Epitran('uig-Arab')

## Front-End Utils
def generate_short_text(script_choice):
    """Generate a random Uyghur short text based on the type."""
    text = random.choice(short_texts)
    return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text

def generate_long_text(script_choice):
    """Generate a random Uyghur long text based on the type."""
    text = random.choice(long_texts)
    return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text

def translate_text(input_text, script_choice, target_language):
    """
    Translate Uyghur text to the target language
    """    
    if script_choice == 'Uyghur Latin':
        input_text = ug_latn_to_arab(input_text) 
    translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language])
    return translated_text.text

## ASR Utils
def remove_punctuation(text):
  """Helper function to remove punctuation from text."""
  extra_punctuation = "–؛;،؟?«»‹›−—¬”“"  # Additional custom uyghur punctuation
  all_punctuation = string.punctuation + extra_punctuation

  return text.translate(str.maketrans('', '', all_punctuation))

# def load_and_resample_audio(audio_data, target_rate):
#     """Load audio and resample based on target sample rate"""
#     if isinstance(audio_data, tuple):
#         # microphone
#         sampling_rate, audio_input = audio_data
#         audio_input = (audio_input / 32768.0).astype(np.float32)
#     elif isinstance(audio_data, str):
#         # file upload
#         audio_input, sampling_rate = torchaudio.load(audio_data)
#     else: 
#         return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
#     # Resample if needed
#     if sampling_rate != target_rate:
#         resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
#         audio_input = resampler(audio_input)

#     return audio_input, target_rate

def calculate_pronunciation_accuracy(reference_text, output_text, script_choice):
    """
    Calculate pronunciation accuracy between reference and ASR output text using Epitran.
    """
    
    # make sure input text is arabic script for IPA conversion
    if script_choice == 'Uyghur Latin':
        reference_text = ug_latn_to_arab(reference_text) 

    # Remove punctuation from both texts
    reference_text_clean = remove_punctuation(reference_text)
    output_text_clean = remove_punctuation(output_text)

    # Transliterate both texts to IPA
    reference_ipa = ipa_converter.transliterate(reference_text_clean)
    output_ipa = ipa_converter.transliterate(output_text_clean)

    # Calculate pronunciation accuracy using SequenceMatcher
    matcher = SequenceMatcher(None, reference_text_clean, output_text_clean)
    match_ratio = matcher.ratio()  # This is the fraction of matching characters

    # Convert to percentage
    pronunciation_accuracy = match_ratio * 100

    # Convert reference back to original script for feedback output
    if script_choice == 'Uyghur Latin':
        reference_text_clean = ug_arab_to_latn(reference_text_clean) 
     # Generate Markdown-compatible styled text
    comparison_md = "<h4>Pronunciation Feedback</h4>\n"  # Small header
    comparison_md += "<div style='margin-top: 10px;'>\n"  # Add some spacing
    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
        ref_segment = reference_text_clean[i1:i2]
        out_segment = output_text_clean[j1:j2]

        if opcode == 'equal':  # Matching characters
            comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>'
        elif opcode in ['replace', 'delete', 'insert']:  # Mismatched or missing
            comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
    comparison_md += "</div>"

    return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy