DexterSptizu's picture
Update app.py
07b17aa verified
import gradio as gr
import torchaudio
from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast
# Load model and tokenizer globally with pinned revision
model = AutoModelForSpeechSeq2Seq.from_pretrained(
'usefulsensors/moonshine-tiny',
revision="main",
trust_remote_code=True
)
tokenizer = PreTrainedTokenizerFast.from_pretrained(
'usefulsensors/moonshine-tiny',
revision="main"
)
def transcribe_audio(audio_path):
if audio_path is None:
return "Please provide an audio input."
try:
# Load and resample audio
audio, sr = torchaudio.load(audio_path)
if sr != 16000:
audio = torchaudio.functional.resample(audio, sr, 16000)
# Get transcription
tokens = model(audio)
transcription = tokenizer.decode(tokens[0], skip_special_tokens=True)
return transcription
except Exception as e:
return f"Error processing audio: {str(e)}"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Audio Transcription App")
with gr.Tabs():
with gr.TabItem("Upload Audio"):
audio_file = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload Audio File"
)
output_text1 = gr.Textbox(
label="Transcription",
placeholder="Transcription will appear here..."
)
upload_button = gr.Button("Transcribe Uploaded Audio")
upload_button.click(
fn=transcribe_audio,
inputs=audio_file,
outputs=output_text1
)
with gr.TabItem("Record Audio"):
audio_mic = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record Audio"
)
output_text2 = gr.Textbox(
label="Transcription",
placeholder="Transcription will appear here..."
)
record_button = gr.Button("Transcribe Recorded Audio")
record_button.click(
fn=transcribe_audio,
inputs=audio_mic,
outputs=output_text2
)
gr.Markdown("""
### Instructions:
1. Choose either 'Upload Audio' or 'Record Audio' tab
2. Upload an audio file or record using your microphone
3. Click the respective 'Transcribe' button
4. Wait for the transcription to appear
""")
if __name__ == "__main__":
demo.launch()