speech-to-text / app.py
cptsubtext
only one run
9fb8da4
import streamlit as st
from stable_whisper import load_model
from stable_whisper import load_hf_whisper
from pydub import AudioSegment
import webvtt
import pysrt
import requests
import os
# Variables
#valid_api_token = st.secrets["API_TOKEN"]
st.title("Speech-to-Text")
with st.expander("README"):
st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )
# Upload audio file
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])
# Free tier or API token option
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
api_token = st.text_input("API Token (Unlimited)")
# Should we translate to english?
translate = st.checkbox("Would you like a translation to english?")
# Model selection
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))
def transcribe_to_subtitle(audio_bytes, model_name):
"""Transcribe audio to subtitle using OpenAI Whisper"""
# Load model based on selection
model = load_model(model_name)
#speedmodel = load_hf_whisper(model_name)
# Check how long the audio is free tier
# newAudio = AudioSegment.from_wav("audiofiles/download.wav")
#if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
# st.error(len(audio_bytes))
# st.error("Free tier only supports audio files under 2 minutes")
# return
# Transcribe audio
try:
if translate:
result = model.transcribe(audio_bytes, verbose=True, task = 'translate')
result.to_srt_vtt('audio.srt')
else:
result = model.transcribe(audio_bytes, verbose=True)
result.to_srt_vtt('audio.srt')
except Exception as e:
return {"error": f"Error during transcription: {str(e)}"}
captions = pysrt.open("audio.srt")
for caption in captions:
print(caption.start)
print(caption.text)
print(caption.end)
print()
output = captions.text
st.markdown(output, unsafe_allow_html=True)
# Download option
st.success("Transcription successful! Download subtitle file?")
with open("audio.srt", "rb") as f:
st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
os.remove("audio.srt") # Remove temporary file
if uploaded_file is not None:
audio_bytes = uploaded_file.read()
# Check for API token if free tier is not selected
if not use_free_tier and not api_token:
st.error("API token required for non-free tier usage")
else:
transcribe_to_subtitle(audio_bytes, model_size)