Spaces:
Sleeping
Sleeping
import streamlit as st | |
from stable_whisper import load_model | |
from stable_whisper import load_hf_whisper | |
from pydub import AudioSegment | |
import webvtt | |
import pysrt | |
import requests | |
import os | |
# Variables | |
#valid_api_token = st.secrets["API_TOKEN"] | |
st.title("Speech-to-Text") | |
with st.expander("README"): | |
st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" ) | |
# Upload audio file | |
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"]) | |
# Free tier or API token option | |
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)") | |
api_token = st.text_input("API Token (Unlimited)") | |
# Should we translate to english? | |
translate = st.checkbox("Would you like a translation to english?") | |
# Model selection | |
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium")) | |
def transcribe_to_subtitle(audio_bytes, model_name): | |
"""Transcribe audio to subtitle using OpenAI Whisper""" | |
# Load model based on selection | |
model = load_model(model_name) | |
#speedmodel = load_hf_whisper(model_name) | |
# Check how long the audio is free tier | |
# newAudio = AudioSegment.from_wav("audiofiles/download.wav") | |
#if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024: | |
# st.error(len(audio_bytes)) | |
# st.error("Free tier only supports audio files under 2 minutes") | |
# return | |
# Transcribe audio | |
try: | |
if translate: | |
result = model.transcribe(audio_bytes, verbose=True, task = 'translate') | |
result.to_srt_vtt('audio.srt') | |
else: | |
result = model.transcribe(audio_bytes, verbose=True) | |
result.to_srt_vtt('audio.srt') | |
except Exception as e: | |
return {"error": f"Error during transcription: {str(e)}"} | |
captions = pysrt.open("audio.srt") | |
for caption in captions: | |
print(caption.start) | |
print(caption.text) | |
print(caption.end) | |
print() | |
output = captions.text | |
st.markdown(output, unsafe_allow_html=True) | |
# Download option | |
st.success("Transcription successful! Download subtitle file?") | |
with open("audio.srt", "rb") as f: | |
st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt") | |
os.remove("audio.srt") # Remove temporary file | |
if uploaded_file is not None: | |
audio_bytes = uploaded_file.read() | |
# Check for API token if free tier is not selected | |
if not use_free_tier and not api_token: | |
st.error("API token required for non-free tier usage") | |
else: | |
transcribe_to_subtitle(audio_bytes, model_size) |