Spaces:

cpt-subtext
/

speech-to-text

Sleeping

speech-to-text / app.py

cptsubtext

only one run

9fb8da4 10 months ago

2.67 kB

	import streamlit as st
	from stable_whisper import load_model
	from stable_whisper import load_hf_whisper
	from pydub import AudioSegment
	import webvtt
	import pysrt
	import requests
	import os

	# Variables
	#valid_api_token = st.secrets["API_TOKEN"]

	st.title("Speech-to-Text")

	with st.expander("README"):
	st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )

	# Upload audio file
	uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])

	# Free tier or API token option
	use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
	api_token = st.text_input("API Token (Unlimited)")

	# Should we translate to english?
	translate = st.checkbox("Would you like a translation to english?")

	# Model selection
	model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))

	def transcribe_to_subtitle(audio_bytes, model_name):
	"""Transcribe audio to subtitle using OpenAI Whisper"""
	# Load model based on selection
	model = load_model(model_name)
	#speedmodel = load_hf_whisper(model_name)

	# Check how long the audio is free tier
	# newAudio = AudioSegment.from_wav("audiofiles/download.wav")
	#if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
	# st.error(len(audio_bytes))
	# st.error("Free tier only supports audio files under 2 minutes")
	# return

	# Transcribe audio
	try:
	if translate:
	result = model.transcribe(audio_bytes, verbose=True, task = 'translate')
	result.to_srt_vtt('audio.srt')
	else:
	result = model.transcribe(audio_bytes, verbose=True)
	result.to_srt_vtt('audio.srt')
	except Exception as e:
	return {"error": f"Error during transcription: {str(e)}"}

	captions = pysrt.open("audio.srt")
	for caption in captions:
	print(caption.start)
	print(caption.text)
	print(caption.end)
	print()

	output = captions.text
	st.markdown(output, unsafe_allow_html=True)

	# Download option
	st.success("Transcription successful! Download subtitle file?")
	with open("audio.srt", "rb") as f:
	st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
	os.remove("audio.srt") # Remove temporary file

	if uploaded_file is not None:
	audio_bytes = uploaded_file.read()
	# Check for API token if free tier is not selected
	if not use_free_tier and not api_token:
	st.error("API token required for non-free tier usage")
	else:
	transcribe_to_subtitle(audio_bytes, model_size)