Spaces:
Sleeping
Sleeping
import streamlit as st | |
import whisper | |
import tempfile | |
import os | |
# Set page config | |
st.set_page_config( | |
page_title="Audio Transcription App", | |
page_icon="ποΈ" | |
) | |
# Initialize Whisper model | |
def load_whisper_model(): | |
return whisper.load_model("medium") | |
def main(): | |
st.title("ποΈ Audio Transcription App") | |
st.write("Record or upload audio to get its transcription") | |
# Initialize the Whisper model | |
try: | |
model = load_whisper_model() | |
st.success("β Whisper model loaded successfully") | |
except Exception as e: | |
st.error(f"Error loading Whisper model: {str(e)}") | |
return | |
# Create two columns for upload and record options | |
col1, col2 = st.columns(2) | |
with col1: | |
# File upload option | |
audio_file = st.file_uploader("Upload Audio", type=['wav', 'mp3', 'm4a']) | |
with col2: | |
# Audio recording option | |
audio_record = st.audio_input("Record Audio") | |
if audio_file is not None or audio_record is not None: | |
audio_to_process = audio_file if audio_file is not None else audio_record | |
if st.button("Transcribe Audio"): | |
with st.spinner("Transcribing..."): | |
try: | |
# Create a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: | |
# Write the audio data to the temporary file | |
tmp_file.write(audio_to_process.read()) | |
tmp_file_path = tmp_file.name | |
# Transcribe the audio | |
result = model.transcribe(tmp_file_path, language="nepali") | |
# Display results | |
st.success("Transcription Complete!") | |
st.write("### Transcription:") | |
st.write(result["text"]) | |
# Cleanup | |
os.unlink(tmp_file_path) | |
except Exception as e: | |
st.error(f"Error during transcription: {str(e)}") | |
# Add usage instructions | |
with st.expander("βΉοΈ How to use"): | |
st.write(""" | |
1. Either upload an audio file or record audio using the microphone | |
2. Click the 'Transcribe Audio' button | |
3. Wait for the transcription to complete | |
4. View the transcribed text below | |
Supported file formats: WAV, MP3, M4A | |
""") | |
# Add info about the model | |
with st.expander("π€ About the Model"): | |
st.write(""" | |
This app uses OpenAI's Whisper model (base version) for transcription. | |
- Handles multiple languages automatically | |
- Optimized for efficiency and accuracy | |
- Processing time depends on audio length | |
""") | |
if __name__ == "__main__": | |
main() |