Greysuki's picture
first
e4570d0
import gradio as gr
import openai
from typing import Iterator, TextIO
import tempfile
from pydub import AudioSegment
def audio_from_file(filename: str) -> AudioSegment:
try:
audio = AudioSegment.from_file(filename)
except FileNotFoundError:
raise ValueError(
f"Cannot load audio from file: `{filename}` not found. Do you forgot to install `ffmpeg`."
)
return audio
def format_timestamp(seconds: float, always_include_hours: bool = False):
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
def write_srt(transcript: Iterator[dict], file: TextIO):
"""
Write a transcript to a file in SRT format.
Example usage:
from pathlib import Path
from whisper.utils import write_srt
result = transcribe(model, audio_path, temperature=temperature, **args)
# save SRT
audio_basename = Path(audio_path).stem
with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
write_srt(result["segments"], file=srt)
"""
with open(file, "w", encoding="UTF-8") as f:
for segment in transcript:
# write srt lines
id = segment["id"]
start = format_timestamp(segment["start"], always_include_hours=True)
end = format_timestamp(segment["end"], always_include_hours=True)
text = segment["text"].strip().replace("-->", "->")
f.write(f"{id}\n{start} --> {end}\n{text}\n\n")
def create_main_tab():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
api_text = gr.Textbox(label="OpenAI API Key")
file_type = gr.Radio(
["Video", "Audio"],
value="Video",
label="File Type",
interactive=True,
)
video = gr.Video()
audio = gr.Audio(visible=False)
with gr.Row():
compress_btn = gr.Button("Compress")
submit_btn = gr.Button("Submit")
with gr.Column():
compress_file = gr.File(label="Compress file", interactive=False)
subtitle_file = gr.File(label="Subtitle")
message_text = gr.Textbox(label="Info")
def handle_file_type_change(evt: gr.SelectData):
if evt.index == 0:
# Video
return [gr.update(visible=True), gr.update(visible=False)]
elif evt.index == 1:
# Audio
return [gr.update(visible=False), gr.update(visible=True)]
file_type.select(
handle_file_type_change,
None,
[video, audio],
)
def handle_compress_btn_submit(file_type, video, audio):
if file_type == "Video":
audio_data = audio_from_file(video)
elif file_type == "Audio":
audio_data = audio_from_file(audio)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
audio_data.export(tmp_file.name, format="mp3", bitrate="96k")
return tmp_file.name
compress_btn.click(
fn=handle_compress_btn_submit,
inputs=[file_type, video, audio],
outputs=[compress_file],
)
def handle_btn_submit(compress_file, api_text):
def transcribe_audio(input_file, output_file):
with open(input_file, "rb") as f:
try:
result = openai.Audio.transcribe("whisper-1", f)
write_srt(result["segments"], output_file)
return "Success! The subtitle file will be named: {output_file}"
except Exception as e:
return f"Error. OpenAI API unavailable. Received: {e}"
openai.api_key = api_text
with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as out_file:
out_message = transcribe_audio(compress_file.name, out_file.name)
return out_file.name, out_message
submit_btn.click(
fn=handle_btn_submit,
inputs=[compress_file, api_text],
outputs=[subtitle_file, message_text],
)
return demo
demo = create_main_tab()
if __name__ == "__main__":
demo.launch()