|
|
|
|
|
import gradio as gr |
|
import speech_recognition as sr |
|
from Levenshtein import ratio |
|
import tempfile |
|
import numpy as np |
|
import soundfile as sf |
|
import pandas as pd |
|
|
|
|
|
data = { |
|
"Sentences": [ |
|
"A stitch in time saves nine.", |
|
"To be or not to be, that is the question.", |
|
"Five cats were living in safe caves.", |
|
"Hives give shelter to bees in large caves.", |
|
"His decision to plant a rose was amazing.", |
|
"She sells sea shells by the sea shore.", |
|
"The colorful parrot likes rolling berries.", |
|
"Time flies like an arrow; fruit flies like a banana.", |
|
"Good things come to those who wait.", |
|
"All human beings are born free and equal in dignity and rights." |
|
] |
|
} |
|
df = pd.DataFrame(data) |
|
|
|
def transcribe_audio(file_info): |
|
r = sr.Recognizer() |
|
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: |
|
sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') |
|
tmpfile.seek(0) |
|
with sr.AudioFile(tmpfile.name) as source: |
|
audio_data = r.record(source) |
|
try: |
|
text = r.recognize_google(audio_data) |
|
return text |
|
except sr.UnknownValueError: |
|
return "Could not understand audio" |
|
except sr.RequestError as e: |
|
return f"Could not request results; {e}" |
|
|
|
def pronunciation_correction(expected_text, file_info): |
|
user_spoken_text = transcribe_audio(file_info) |
|
similarity = ratio(expected_text.lower(), user_spoken_text.lower()) |
|
description = f"{similarity:.2f}" |
|
|
|
if similarity >= 0.9: |
|
feedback = "Excellent pronunciation!" |
|
elif similarity >= 0.7: |
|
feedback = "Good pronunciation!" |
|
elif similarity >= 0.5: |
|
feedback = "Needs improvement." |
|
else: |
|
feedback = "Poor pronunciation, try to focus more on clarity." |
|
|
|
return feedback, description |
|
|
|
with gr.Blocks() as app: |
|
with gr.Row(): |
|
sentence_dropdown = gr.Dropdown(choices=df['Sentences'].tolist(), label="Select a Sentence") |
|
selected_sentence_output = gr.Textbox(label="Selected Text", interactive=False) |
|
audio_input = gr.Audio(label="Upload Audio File", type="numpy") |
|
check_pronunciation_button = gr.Button("Check Pronunciation") |
|
pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback") |
|
pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)") |
|
|
|
sentence_dropdown.change(lambda x: x, inputs=sentence_dropdown, outputs=selected_sentence_output) |
|
check_pronunciation_button.click( |
|
pronunciation_correction, |
|
inputs=[sentence_dropdown, audio_input], |
|
outputs=[pronunciation_feedback, pronunciation_score] |
|
) |
|
|
|
app.launch(debug=True) |