Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#@markdown Language Application: WER, Fluency (in N of pauses), WPM (Words per minute)
|
2 |
+
import gradio as gr
|
3 |
+
import speech_recognition as sr
|
4 |
+
from Levenshtein import distance as lev_distance, ratio
|
5 |
+
import tempfile
|
6 |
+
import soundfile as sf
|
7 |
+
import librosa
|
8 |
+
|
9 |
+
def analyze_speech(file_info):
|
10 |
+
r = sr.Recognizer()
|
11 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
|
12 |
+
# Write the sound file to the temporary file
|
13 |
+
sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
|
14 |
+
tmpfile.seek(0)
|
15 |
+
|
16 |
+
# Load audio for pause analysis and speech rate
|
17 |
+
y, sr_lib = librosa.load(tmpfile.name, sr=None) # Load the file with the original sampling rate
|
18 |
+
duration = librosa.get_duration(y=y, sr=sr_lib)
|
19 |
+
|
20 |
+
# Detect pauses
|
21 |
+
pause_frames = librosa.effects.split(y, top_db=32)
|
22 |
+
pauses = [(start, end) for start, end in pause_frames if (end - start) / sr_lib > 0.5]
|
23 |
+
num_pauses = len(pauses)
|
24 |
+
|
25 |
+
with sr.AudioFile(tmpfile.name) as source:
|
26 |
+
audio_data = r.record(source)
|
27 |
+
text = r.recognize_google(audio_data)
|
28 |
+
|
29 |
+
return text, num_pauses, duration, len(text.split())
|
30 |
+
|
31 |
+
def calculate_wer(reference, hypothesis):
|
32 |
+
ref_words = reference.split()
|
33 |
+
hyp_words = hypothesis.split()
|
34 |
+
edit_distance = lev_distance(ref_words, hyp_words)
|
35 |
+
wer = edit_distance / len(ref_words) if ref_words else float('inf') # Avoid division by zero
|
36 |
+
return wer
|
37 |
+
|
38 |
+
def pronunciation_correction(expected_text, file_info):
|
39 |
+
user_spoken_text, num_pauses, duration, total_words = analyze_speech(file_info)
|
40 |
+
wer = calculate_wer(expected_text.lower(), user_spoken_text.lower())
|
41 |
+
wpm = total_words / (duration / 60) if duration > 0 else 0
|
42 |
+
similarity = ratio(expected_text.lower(), user_spoken_text.lower())
|
43 |
+
|
44 |
+
feedback = "Excellent pronunciation!" if similarity >= 0.9 else \
|
45 |
+
"Good pronunciation!" if similarity >= 0.7 else \
|
46 |
+
"Needs improvement." if similarity >= 0.5 else \
|
47 |
+
"Poor pronunciation, try to focus more on clarity."
|
48 |
+
|
49 |
+
description = f"WER: {wer:.2f}, Fluency: {num_pauses} pauses, {wpm:.0f} WPM"
|
50 |
+
|
51 |
+
return feedback, description
|
52 |
+
|
53 |
+
with gr.Blocks() as app:
|
54 |
+
with gr.Row():
|
55 |
+
text_input = gr.Textbox(label="Enter or paste your text here")
|
56 |
+
audio_input = gr.Audio(label="Upload Audio File", type="numpy")
|
57 |
+
check_pronunciation_button = gr.Button("Check Pronunciation")
|
58 |
+
pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
|
59 |
+
pronunciation_details = gr.Textbox(label="Detailed Metrics")
|
60 |
+
|
61 |
+
check_pronunciation_button.click(
|
62 |
+
pronunciation_correction,
|
63 |
+
inputs=[text_input, audio_input],
|
64 |
+
outputs=[pronunciation_feedback, pronunciation_details]
|
65 |
+
)
|
66 |
+
|
67 |
+
app.launch(debug=True)
|