rockerritesh's picture
for nepali-medium model
60da65a verified
import streamlit as st
import whisper
import tempfile
import os
# Set page config
st.set_page_config(
page_title="Audio Transcription App",
page_icon="πŸŽ™οΈ"
)
# Initialize Whisper model
@st.cache_resource
def load_whisper_model():
return whisper.load_model("medium")
def main():
st.title("πŸŽ™οΈ Audio Transcription App")
st.write("Record or upload audio to get its transcription")
# Initialize the Whisper model
try:
model = load_whisper_model()
st.success("βœ… Whisper model loaded successfully")
except Exception as e:
st.error(f"Error loading Whisper model: {str(e)}")
return
# Create two columns for upload and record options
col1, col2 = st.columns(2)
with col1:
# File upload option
audio_file = st.file_uploader("Upload Audio", type=['wav', 'mp3', 'm4a'])
with col2:
# Audio recording option
audio_record = st.audio_input("Record Audio")
if audio_file is not None or audio_record is not None:
audio_to_process = audio_file if audio_file is not None else audio_record
if st.button("Transcribe Audio"):
with st.spinner("Transcribing..."):
try:
# Create a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
# Write the audio data to the temporary file
tmp_file.write(audio_to_process.read())
tmp_file_path = tmp_file.name
# Transcribe the audio
result = model.transcribe(tmp_file_path, language="nepali")
# Display results
st.success("Transcription Complete!")
st.write("### Transcription:")
st.write(result["text"])
# Cleanup
os.unlink(tmp_file_path)
except Exception as e:
st.error(f"Error during transcription: {str(e)}")
# Add usage instructions
with st.expander("ℹ️ How to use"):
st.write("""
1. Either upload an audio file or record audio using the microphone
2. Click the 'Transcribe Audio' button
3. Wait for the transcription to complete
4. View the transcribed text below
Supported file formats: WAV, MP3, M4A
""")
# Add info about the model
with st.expander("πŸ€– About the Model"):
st.write("""
This app uses OpenAI's Whisper model (base version) for transcription.
- Handles multiple languages automatically
- Optimized for efficiency and accuracy
- Processing time depends on audio length
""")
if __name__ == "__main__":
main()