Spaces:
Runtime error
Runtime error
#@markdown Language Application: WER, Fluency (in N of pauses), WPM (Words per minute) | |
import gradio as gr | |
import speech_recognition as sr | |
from Levenshtein import distance as lev_distance, ratio | |
import tempfile | |
import soundfile as sf | |
import librosa | |
def analyze_speech(file_info): | |
r = sr.Recognizer() | |
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: | |
# Write the sound file to the temporary file | |
sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') | |
tmpfile.seek(0) | |
# Load audio for pause analysis and speech rate | |
y, sr_lib = librosa.load(tmpfile.name, sr=None) # Load the file with the original sampling rate | |
duration = librosa.get_duration(y=y, sr=sr_lib) | |
# Detect pauses | |
pause_frames = librosa.effects.split(y, top_db=32) | |
pauses = [(start, end) for start, end in pause_frames if (end - start) / sr_lib > 0.5] | |
num_pauses = len(pauses) | |
with sr.AudioFile(tmpfile.name) as source: | |
audio_data = r.record(source) | |
text = r.recognize_google(audio_data) | |
return text, num_pauses, duration, len(text.split()) | |
def calculate_wer(reference, hypothesis): | |
ref_words = reference.split() | |
hyp_words = hypothesis.split() | |
edit_distance = lev_distance(ref_words, hyp_words) | |
wer = edit_distance / len(ref_words) if ref_words else float('inf') # Avoid division by zero | |
return wer | |
def pronunciation_correction(expected_text, file_info): | |
user_spoken_text, num_pauses, duration, total_words = analyze_speech(file_info) | |
wer = calculate_wer(expected_text.lower(), user_spoken_text.lower()) | |
wpm = total_words / (duration / 60) if duration > 0 else 0 | |
similarity = ratio(expected_text.lower(), user_spoken_text.lower()) | |
feedback = "Excellent pronunciation!" if similarity >= 0.9 else \ | |
"Good pronunciation!" if similarity >= 0.7 else \ | |
"Needs improvement." if similarity >= 0.5 else \ | |
"Poor pronunciation, try to focus more on clarity." | |
description = f"WER: {wer:.2f}, Fluency: {num_pauses} pauses, {wpm:.0f} WPM" | |
return feedback, description | |
with gr.Blocks() as app: | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter or paste your text here") | |
audio_input = gr.Audio(label="Upload Audio File", type="numpy") | |
check_pronunciation_button = gr.Button("Check Pronunciation") | |
pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback") | |
pronunciation_details = gr.Textbox(label="Detailed Metrics") | |
check_pronunciation_button.click( | |
pronunciation_correction, | |
inputs=[text_input, audio_input], | |
outputs=[pronunciation_feedback, pronunciation_details] | |
) | |
app.launch(debug=True) | |