DexterSptizu's picture
Create app.py
5954031 verified
raw
history blame
1.48 kB
import gradio as gr
import torchaudio
from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast
def transcribe_audio(audio_path):
# Load and resample audio
audio, sr = torchaudio.load(audio_path)
if sr != 16000:
audio = torchaudio.functional.resample(audio, sr, 16000)
# Get transcription
tokens = model(audio)
transcription = tokenizer.decode(tokens[0], skip_special_tokens=True)
return transcription
# Load model and tokenizer globally
model = AutoModelForSpeechSeq2Seq.from_pretrained('usefulsensors/moonshine-tiny', trust_remote_code=True)
tokenizer = PreTrainedTokenizerFast.from_pretrained('usefulsensors/moonshine-tiny')
# Create Gradio interface
demo = gr.Blocks()
with demo:
gr.Markdown("## Audio Transcription App")
with gr.Tabs():
with gr.TabItem("Upload Audio"):
audio_file = gr.Audio(source="upload", type="filepath")
output_text1 = gr.Textbox(label="Transcription")
upload_button = gr.Button("Transcribe")
upload_button.click(fn=transcribe_audio, inputs=audio_file, outputs=output_text1)
with gr.TabItem("Record Audio"):
audio_mic = gr.Audio(source="microphone", type="filepath")
output_text2 = gr.Textbox(label="Transcription")
record_button = gr.Button("Transcribe")
record_button.click(fn=transcribe_audio, inputs=audio_mic, outputs=output_text2)
demo.launch()