Spaces:

ChiBenevisamPas
/

Video-Subtitle-Translate

Runtime error

App Files Files Community

ChiBenevisamPas commited on Oct 16, 2024

Commit

daf618a

verified ·

1 Parent(s): 1e62147

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -52

app.py CHANGED Viewed

@@ -2,14 +2,15 @@ import gradio as gr
 import whisper
 import os
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
-from docx import Document  # For Word output
-from fpdf import FPDF  # For PDF output
-from pptx import Presentation  # For PowerPoint output
-import subprocess  # To use ffmpeg for embedding subtitles
-import shlex  # For better command-line argument handling
-# Load the Whisper model
-model = whisper.load_model("tiny")  # Smaller model for faster transcription
 # Load M2M100 translation model for different languages
 def load_translation_model(target_language):
@@ -17,12 +18,20 @@ def load_translation_model(target_language):
         "fa": "fa",  # Persian (Farsi)
         "es": "es",  # Spanish
         "fr": "fr",  # French
     }
     target_lang_code = lang_codes.get(target_language)
     if not target_lang_code:
         raise ValueError(f"Translation model for {target_language} not supported")
-    # Load M2M100 model and tokenizer
     tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
     translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
@@ -39,7 +48,7 @@ def translate_text(text, tokenizer, model):
     except Exception as e:
         raise RuntimeError(f"Error during translation: {e}")
-# Helper function to format timestamps in SRT format (hh:mm:ss,ms)
 def format_timestamp(seconds):
     milliseconds = int((seconds % 1) * 1000)
     seconds = int(seconds)
@@ -66,73 +75,81 @@ def write_srt(transcription, output_file, tokenizer=None, translation_model=None
             f.write(f"{start_time} --> {end_time}\n")
             f.write(f"{text.strip()}\n\n")
 def embed_hardsub_in_video(video_file, srt_file, output_video):
-    """Uses ffmpeg to burn subtitles into the video (hardsub)."""
     command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
     try:
-        print(f"Running command: {command}")  # Debug statement
         process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
-        print(f"ffmpeg output: {process.stdout}")  # Debug statement
         if process.returncode != 0:
-            raise RuntimeError(f"ffmpeg error: {process.stderr}")  # Print the error
     except subprocess.TimeoutExpired:
         raise RuntimeError("ffmpeg process timed out.")
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
-def write_word(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a Word document from the transcription without timestamps."""
     doc = Document()
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        doc.add_paragraph(f"{i + 1}. {text.strip()}")
     doc.save(output_file)
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a PDF document from the transcription without timestamps."""
     pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=15)
     pdf.add_page()
-    pdf.set_font("Arial", size=12)
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        pdf.multi_cell(0, 10, f"{i + 1}. {text.strip()}")
     pdf.output(output_file)
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a PowerPoint presentation from the transcription without timestamps."""
     ppt = Presentation()
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        slide = ppt.slides.add_slide(ppt.slide_layouts[5])  # Blank slide
         title = slide.shapes.title
         title.text = f"{i + 1}. {text.strip()}"
     ppt.save(output_file)
-def transcribe_video(video_file, language, target_language, output_format):
-    # Transcribe the video with Whisper
-    result = model.transcribe(video_file.name, language=language)
-    video_name = os.path.splitext(video_file.name)[0]
-    # Load the translation model for the selected subtitle language
     if target_language != "en":
         try:
             tokenizer, translation_model = load_translation_model(target_language)
@@ -141,23 +158,21 @@ def transcribe_video(video_file, language, target_language, output_format):
     else:
         tokenizer, translation_model = None, None
-    # Save the SRT file
     srt_file = f"{video_name}.srt"
     write_srt(result, srt_file, tokenizer, translation_model)
-    # Output based on user's selection
     if output_format == "SRT":
         return srt_file
     elif output_format == "Video with Hardsub":
         output_video = f"{video_name}_with_subtitles.mp4"
         try:
-            embed_hardsub_in_video(video_file.name, srt_file, output_video)
             return output_video
         except Exception as e:
             raise RuntimeError(f"Error embedding subtitles in video: {e}")
     elif output_format == "Word":
         word_file = f"{video_name}.docx"
-        write_word(result, word_file, tokenizer, translation_model)
         return word_file
     elif output_format == "PDF":
         pdf_file = f"{video_name}.pdf"
@@ -168,19 +183,28 @@ def transcribe_video(video_file, language, target_language, output_format):
         write_ppt(result, ppt_file, tokenizer, translation_model)
         return ppt_file
-# Gradio interface
 iface = gr.Interface(
     fn=transcribe_video,
     inputs=[
-        gr.File(label="Upload Video"),
-        gr.Dropdown(label="Select Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
-        gr.Dropdown(label="Select Subtitle Language", choices=["en", "fa", "es", "fr"], value="fa"),
-        gr.Radio(label="Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
     ],
-    outputs=gr.File(label="Download Subtitles, Video, or Document"),
-    title="Video Subtitle Generator with Hardsub and Document Formats",
-    description="Upload a video file to generate subtitles in SRT format, download the video with hardsubbed subtitles, or generate Word, PDF, or PowerPoint documents using Whisper and M2M100 for translation."
 )
 if __name__ == "__main__":
-    iface.launch()

 import whisper
 import os
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+from docx import Document
+from fpdf import FPDF
+from pptx import Presentation
+import subprocess
+import shlex
+import yt_dlp
+# Load the Whisper model (smaller model for faster transcription)
+model = whisper.load_model("tiny")
 # Load M2M100 translation model for different languages
 def load_translation_model(target_language):
         "fa": "fa",  # Persian (Farsi)
         "es": "es",  # Spanish
         "fr": "fr",  # French
+        "de": "de",  # German
+        "it": "it",  # Italian
+        "pt": "pt",  # Portuguese
+        "ar": "ar",  # Arabic
+        "zh": "zh",  # Chinese
+        "hi": "hi",  # Hindi
+        "ja": "ja",  # Japanese
+        "ko": "ko",  # Korean
+        "ru": "ru",  # Russian
     }
     target_lang_code = lang_codes.get(target_language)
     if not target_lang_code:
         raise ValueError(f"Translation model for {target_language} not supported")
     tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
     translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
     except Exception as e:
         raise RuntimeError(f"Error during translation: {e}")
+# Helper function to format timestamps in SRT format
 def format_timestamp(seconds):
     milliseconds = int((seconds % 1) * 1000)
     seconds = int(seconds)
             f.write(f"{start_time} --> {end_time}\n")
             f.write(f"{text.strip()}\n\n")
+# Embedding subtitles into video (hardsub)
 def embed_hardsub_in_video(video_file, srt_file, output_video):
     command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
     try:
         process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
         if process.returncode != 0:
+            raise RuntimeError(f"ffmpeg error: {process.stderr}")
     except subprocess.TimeoutExpired:
         raise RuntimeError("ffmpeg process timed out.")
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
+# Helper function to write Word documents
+def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
     doc = Document()
+    rtl = target_language == "fa"
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
+        para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
+        if rtl:
+            para.paragraph_format.right_to_left = True
     doc.save(output_file)
+# Helper function to reverse text for RTL
+def reverse_text_for_rtl(text):
+    return ' '.join([word[::-1] for word in text.split()])
+# Helper function to write PDF documents
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
     pdf = FPDF()
     pdf.add_page()
+    font_path = "/home/user/app/B-NAZANIN.TTF"
+    pdf.add_font('B-NAZANIN', '', font_path, uni=True)
+    pdf.set_font('B-NAZANIN', size=12)
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
+        reversed_text = reverse_text_for_rtl(text)
+        pdf.multi_cell(0, 10, f"{i + 1}. {reversed_text.strip()}", align='R')
     pdf.output(output_file)
+# Helper function to write PowerPoint slides
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
     ppt = Presentation()
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
+        slide = ppt.slides.add_slide(ppt.slide_layouts[5])
         title = slide.shapes.title
         title.text = f"{i + 1}. {text.strip()}"
     ppt.save(output_file)
+# Function to download YouTube video
+def download_youtube_video(url):
+    ydl_opts = {
+        'format': 'mp4',
+        'outtmpl': 'downloaded_video.mp4',
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return 'downloaded_video.mp4'
+# Transcribing video and generating output
+def transcribe_video(video_file, video_url, language, target_language, output_format):
+    if video_url:
+        video_file_path = download_youtube_video(video_url)
+    else:
+        video_file_path = video_file.name
+    result = model.transcribe(video_file_path, language=language)
+    video_name = os.path.splitext(video_file_path)[0]
     if target_language != "en":
         try:
             tokenizer, translation_model = load_translation_model(target_language)
     else:
         tokenizer, translation_model = None, None
     srt_file = f"{video_name}.srt"
     write_srt(result, srt_file, tokenizer, translation_model)
     if output_format == "SRT":
         return srt_file
     elif output_format == "Video with Hardsub":
         output_video = f"{video_name}_with_subtitles.mp4"
         try:
+            embed_hardsub_in_video(video_file_path, srt_file, output_video)
             return output_video
         except Exception as e:
             raise RuntimeError(f"Error embedding subtitles in video: {e}")
     elif output_format == "Word":
         word_file = f"{video_name}.docx"
+        write_word(result, word_file, tokenizer, translation_model, target_language)
         return word_file
     elif output_format == "PDF":
         pdf_file = f"{video_name}.pdf"
         write_ppt(result, ppt_file, tokenizer, translation_model)
         return ppt_file
+# Gradio interface with YouTube URL
 iface = gr.Interface(
     fn=transcribe_video,
     inputs=[
+        gr.File(label="Upload Video File (or leave empty for YouTube link)"),  # Removed 'optional=True'
+        gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
+        gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
+        gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
+        gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
     ],
+    outputs=gr.File(label="Download File"),
+    title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
+    description=(
+        "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
+        "translate the subtitles into multiple languages using M2M100, and export them "
+        "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
+    ),
+    theme="compact",
+    live=False
 )
 if __name__ == "__main__":
+    iface.launch()