import gradio as gr import torchaudio from transformers import AutoModelForSpeechSeq2Seq, PreTrainedTokenizerFast # Load model and tokenizer globally with pinned revision model = AutoModelForSpeechSeq2Seq.from_pretrained( 'usefulsensors/moonshine-tiny', revision="main", trust_remote_code=True ) tokenizer = PreTrainedTokenizerFast.from_pretrained( 'usefulsensors/moonshine-tiny', revision="main" ) def transcribe_audio(audio_path): if audio_path is None: return "Please provide an audio input." try: # Load and resample audio audio, sr = torchaudio.load(audio_path) if sr != 16000: audio = torchaudio.functional.resample(audio, sr, 16000) # Get transcription tokens = model(audio) transcription = tokenizer.decode(tokens[0], skip_special_tokens=True) return transcription except Exception as e: return f"Error processing audio: {str(e)}" # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("## Audio Transcription App") with gr.Tabs(): with gr.TabItem("Upload Audio"): audio_file = gr.Audio( sources=["upload"], type="filepath", label="Upload Audio File" ) output_text1 = gr.Textbox( label="Transcription", placeholder="Transcription will appear here..." ) upload_button = gr.Button("Transcribe Uploaded Audio") upload_button.click( fn=transcribe_audio, inputs=audio_file, outputs=output_text1 ) with gr.TabItem("Record Audio"): audio_mic = gr.Audio( sources=["microphone"], type="filepath", label="Record Audio" ) output_text2 = gr.Textbox( label="Transcription", placeholder="Transcription will appear here..." ) record_button = gr.Button("Transcribe Recorded Audio") record_button.click( fn=transcribe_audio, inputs=audio_mic, outputs=output_text2 ) gr.Markdown(""" ### Instructions: 1. Choose either 'Upload Audio' or 'Record Audio' tab 2. Upload an audio file or record using your microphone 3. Click the respective 'Transcribe' button 4. Wait for the transcription to appear """) if __name__ == "__main__": demo.launch()