Spaces:

nelikCode
/

AudioVisualTranscription

Running

App Files Files Community

nelikCode commited on Nov 12, 2024

Commit

1044a67

verified ·

1 Parent(s): 83a90e2

chore: code refract

Browse files

Files changed (1) hide show

app.py +57 -15

app.py CHANGED Viewed

@@ -6,14 +6,19 @@ import whisper
 from moviepy.editor import (
     AudioFileClip,
     ColorClip,
-    CompositeVideoClip,
     VideoFileClip,
     concatenate_videoclips,
 )
-from moviepy.video.VideoClip import TextClip
-def generate_srt_file(transcription_result, srt_file_path, lag=0):
     with open(srt_file_path, "w") as file:
         for i, segment in enumerate(transcription_result["segments"], start=1):
             # Adjusting times for lag
@@ -28,7 +33,17 @@ def generate_srt_file(transcription_result, srt_file_path, lag=0):
             file.write(f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n")
-def get_srt_filename(video_path, audio_path):
     if video_path is not None:
         return os.path.splitext(os.path.basename(video_path))[0] + ".srt"
     else:
@@ -36,14 +51,33 @@ def get_srt_filename(video_path, audio_path):
 def generate_video(
-    audio_path, video_path, input, language, lag, progress=gr.Progress(track_tqdm=True)
-):
     if audio_path is None and video_path is None:
-        raise ValueError("Please upload an audio or video file.")
     if input == "Video" and video_path is None:
-        raise ValueError("Please upload a video file.")
     if input == "Audio" and audio_path is None:
-        raise ValueError("Please upload an audio file.")
     progress(0.0, "Checking input...")
     if input == "Video":
         progress(0.0, "Extracting audio from video...")
@@ -55,7 +89,7 @@ def generate_video(
     # Transcribe audio
     progress(0.1, "Transcribing audio...")
-    result = model.transcribe(audio_path, language=language)
     progress(0.30, "Audio transcribed!")
     # Generate SRT file
@@ -72,7 +106,6 @@ def generate_video(
         else:
             # we simply extend the original video with a black screen at the end of duration lag
             video = VideoFileClip(video_path)
-            fps = video.fps
             black_screen = ColorClip(
                 size=video.size, color=(0, 0, 0), duration=lag
             ).set_fps(1)
@@ -96,7 +129,17 @@ def generate_video(
         return output_video_path, srt_file_path
-def download_srt(audio_input, video_input):
     srt_file_path = get_srt_filename(video_input, audio_input)
     if os.path.exists(srt_file_path):
         return srt_file_path
@@ -106,9 +149,8 @@ def download_srt(audio_input, video_input):
 if __name__ == "__main__":
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-    model = whisper.load_model("base", device=DEVICE)
-    # Gradio Blocks implementation
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             """
@@ -176,4 +218,4 @@ if __name__ == "__main__":
             outputs=srt_file_output,
         )
-    demo.launch()

 from moviepy.editor import (
     AudioFileClip,
     ColorClip,
     VideoFileClip,
     concatenate_videoclips,
 )
+def generate_srt_file(transcription_result: dict, srt_file_path: str, lag=0) -> None:
+    """
+    Write and save an SRT file from the transcription result.
+    Args:
+        transcription_result: The transcription result from Whisper model.
+        srt_file_path: The path to save the SRT file.
+    """
     with open(srt_file_path, "w") as file:
         for i, segment in enumerate(transcription_result["segments"], start=1):
             # Adjusting times for lag
             file.write(f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n")
+def get_srt_filename(video_path: str, audio_path: str = None) -> str:
+    """
+    Get the SRT filename based on the input video or audio file.
+    Args:
+        video_path: The path to the video file.
+        audio_path: The path to the audio file.
+    Returns:
+        The SRT filename.
+    """
     if video_path is not None:
         return os.path.splitext(os.path.basename(video_path))[0] + ".srt"
     else:
 def generate_video(
+    audio_path: str,
+    video_path: str,
+    input: str,
+    language: str,
+    lag: int,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+) -> tuple[str, str]:
+    """
+    Generate a subtitled video from the input audio or video file.
+    Args:
+        audio_path: The path to the audio file.
+        video_path: The path to the video file.
+        input: The type of input file (audio or video).
+        language: The language code for transcription.
+        lag: The lag time in seconds to delay the transcription.
+        progress: The progress bar to show the progress of the task.
+    Returns:
+        The path to the generated video file and the SRT file.
+    """
     if audio_path is None and video_path is None:
+        raise gr.Error("Please upload an audio or video file.")
     if input == "Video" and video_path is None:
+        raise gr.Error("Please upload a video file.")
     if input == "Audio" and audio_path is None:
+        raise gr.Error("Please upload an audio file.")
     progress(0.0, "Checking input...")
     if input == "Video":
         progress(0.0, "Extracting audio from video...")
     # Transcribe audio
     progress(0.1, "Transcribing audio...")
+    result = MODEL.transcribe(audio_path, language=language)
     progress(0.30, "Audio transcribed!")
     # Generate SRT file
         else:
             # we simply extend the original video with a black screen at the end of duration lag
             video = VideoFileClip(video_path)
             black_screen = ColorClip(
                 size=video.size, color=(0, 0, 0), duration=lag
             ).set_fps(1)
         return output_video_path, srt_file_path
+def download_srt(audio_input: str, video_input: str) -> str:
+    """
+    Download the SRT file based on the input audio or video file.
+    Args:
+        audio_input: The path to the audio file.
+        video_input: The path to the video file.
+    Returns:
+        The path to the downloaded SRT file.
+    """
     srt_file_path = get_srt_filename(video_input, audio_input)
     if os.path.exists(srt_file_path):
         return srt_file_path
 if __name__ == "__main__":
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    MODEL = whisper.load_model("base", device=DEVICE)
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             """
             outputs=srt_file_output,
         )
+    demo.launch()