Spaces:

rockerritesh
/

nepali-speech-to-text

Sleeping

App Files Files Community

nepali-speech-to-text / app.py

rockerritesh

for nepali-medium model

60da65a verified about 2 months ago

raw

history blame contribute delete

2.89 kB

	import streamlit as st
	import whisper
	import tempfile
	import os

	# Set page config
	st.set_page_config(
	page_title="Audio Transcription App",
	page_icon="🎙️"
	)

	# Initialize Whisper model
	@st.cache_resource
	def load_whisper_model():
	return whisper.load_model("medium")

	def main():
	st.title("🎙️ Audio Transcription App")
	st.write("Record or upload audio to get its transcription")

	# Initialize the Whisper model
	try:
	model = load_whisper_model()
	st.success("✅ Whisper model loaded successfully")
	except Exception as e:
	st.error(f"Error loading Whisper model: {str(e)}")
	return

	# Create two columns for upload and record options
	col1, col2 = st.columns(2)

	with col1:
	# File upload option
	audio_file = st.file_uploader("Upload Audio", type=['wav', 'mp3', 'm4a'])

	with col2:
	# Audio recording option
	audio_record = st.audio_input("Record Audio")

	if audio_file is not None or audio_record is not None:
	audio_to_process = audio_file if audio_file is not None else audio_record

	if st.button("Transcribe Audio"):
	with st.spinner("Transcribing..."):
	try:
	# Create a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
	# Write the audio data to the temporary file
	tmp_file.write(audio_to_process.read())
	tmp_file_path = tmp_file.name

	# Transcribe the audio
	result = model.transcribe(tmp_file_path, language="nepali")

	# Display results
	st.success("Transcription Complete!")
	st.write("### Transcription:")
	st.write(result["text"])

	# Cleanup
	os.unlink(tmp_file_path)

	except Exception as e:
	st.error(f"Error during transcription: {str(e)}")

	# Add usage instructions
	with st.expander("ℹ️ How to use"):
	st.write("""
	1. Either upload an audio file or record audio using the microphone
	2. Click the 'Transcribe Audio' button
	3. Wait for the transcription to complete
	4. View the transcribed text below

	Supported file formats: WAV, MP3, M4A
	""")

	# Add info about the model
	with st.expander("🤖 About the Model"):
	st.write("""
	This app uses OpenAI's Whisper model (base version) for transcription.
	- Handles multiple languages automatically
	- Optimized for efficiency and accuracy
	- Processing time depends on audio length
	""")

	if __name__ == "__main__":
	main()