Spaces:

rockerritesh
/

nepali-speech-to-text

Sleeping

File size: 2,886 Bytes

import streamlit as st
import whisper
import tempfile
import os

# Set page config
st.set_page_config(
    page_title="Audio Transcription App",
    page_icon="🎙️"
)

# Initialize Whisper model
@st.cache_resource
def load_whisper_model():
    return whisper.load_model("medium")

def main():
    st.title("🎙️ Audio Transcription App")
    st.write("Record or upload audio to get its transcription")

    # Initialize the Whisper model
    try:
        model = load_whisper_model()
        st.success("✅ Whisper model loaded successfully")
    except Exception as e:
        st.error(f"Error loading Whisper model: {str(e)}")
        return

    # Create two columns for upload and record options
    col1, col2 = st.columns(2)

    with col1:
        # File upload option
        audio_file = st.file_uploader("Upload Audio", type=['wav', 'mp3', 'm4a'])
        
    with col2:
        # Audio recording option
        audio_record = st.audio_input("Record Audio")

    if audio_file is not None or audio_record is not None:
        audio_to_process = audio_file if audio_file is not None else audio_record
        
        if st.button("Transcribe Audio"):
            with st.spinner("Transcribing..."):
                try:
                    # Create a temporary file
                    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
                        # Write the audio data to the temporary file
                        tmp_file.write(audio_to_process.read())
                        tmp_file_path = tmp_file.name

                    # Transcribe the audio
                    result = model.transcribe(tmp_file_path, language="nepali")
                    
                    # Display results
                    st.success("Transcription Complete!")
                    st.write("### Transcription:")
                    st.write(result["text"])
                    
                    # Cleanup
                    os.unlink(tmp_file_path)
                    
                except Exception as e:
                    st.error(f"Error during transcription: {str(e)}")

    # Add usage instructions
    with st.expander("ℹ️ How to use"):
        st.write("""
        1. Either upload an audio file or record audio using the microphone
        2. Click the 'Transcribe Audio' button
        3. Wait for the transcription to complete
        4. View the transcribed text below
        
        Supported file formats: WAV, MP3, M4A
        """)

    # Add info about the model
    with st.expander("🤖 About the Model"):
        st.write("""
        This app uses OpenAI's Whisper model (base version) for transcription.
        - Handles multiple languages automatically
        - Optimized for efficiency and accuracy
        - Processing time depends on audio length
        """)

if __name__ == "__main__":
    main()