Spaces:

visionaries666
/

younes_edition

Running

App Files Files Community

younes21000 commited on Oct 23

Commit

fe11376

•

1 Parent(s): 7e7b0c1

Upload app.py

Browse files

Files changed (1) hide show

app.py +307 -0

app.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import gradio as gr
+import whisper
+import os
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+from docx import Document
+from reportlab.pdfgen import canvas
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase import pdfmetrics
+from reportlab.lib.pagesizes import A4
+import arabic_reshaper
+from bidi.algorithm import get_display
+from pptx import Presentation
+import subprocess
+import shlex
+import yt_dlp
+# Load the Whisper model (smaller model for faster transcription)
+model = whisper.load_model("tiny")
+# Load M2M100 translation model for different languages
+def load_translation_model(target_language):
+    lang_codes = {
+        "fa": "fa",  # Persian (Farsi)
+        "es": "es",  # Spanish
+        "fr": "fr",  # French
+        "de": "de",  # German
+        "it": "it",  # Italian
+        "pt": "pt",  # Portuguese
+        "ar": "ar",  # Arabic
+        "zh": "zh",  # Chinese
+        "hi": "hi",  # Hindi
+        "ja": "ja",  # Japanese
+        "ko": "ko",  # Korean
+        "ru": "ru",  # Russian
+    }
+    target_lang_code = lang_codes.get(target_language)
+    if not target_lang_code:
+        raise ValueError(f"Translation model for {target_language} not supported")
+    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+    translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+    tokenizer.src_lang = "en"
+    tokenizer.tgt_lang = target_lang_code
+    return tokenizer, translation_model
+def translate_text(text, tokenizer, model):
+    try:
+        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+        translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
+        return tokenizer.decode(translated[0], skip_special_tokens=True)
+    except Exception as e:
+        raise RuntimeError(f"Error during translation: {e}")
+# Helper function to format timestamps in SRT format
+def format_timestamp(seconds):
+    milliseconds = int((seconds % 1) * 1000)
+    seconds = int(seconds)
+    hours = seconds // 3600
+    minutes = (seconds % 3600) // 60
+    seconds = seconds % 60
+    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
+# Corrected write_srt function
+def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
+    with open(output_file, "w") as f:
+        for i, segment in enumerate(transcription['segments']):
+            start = segment['start']
+            end = segment['end']
+            text = segment['text']
+            if translation_model:
+                text = translate_text(text, tokenizer, translation_model)
+            start_time = format_timestamp(start)
+            end_time = format_timestamp(end)
+            f.write(f"{i + 1}\n")
+            f.write(f"{start_time} --> {end_time}\n")
+            f.write(f"{text.strip()}\n\n")
+# Embedding subtitles into video (hardsub)
+def embed_hardsub_in_video(video_file, srt_file, output_video):
+    command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
+    try:
+        process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
+        if process.returncode != 0:
+            raise RuntimeError(f"ffmpeg error: {process.stderr}")
+    except subprocess.TimeoutExpired:
+        raise RuntimeError("ffmpeg process timed out.")
+    except Exception as e:
+        raise RuntimeError(f"Error running ffmpeg: {e}")
+# Helper function to write Word documents
+def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
+    doc = Document()
+    rtl = target_language == "fa"
+    for i, segment in enumerate(transcription['segments']):
+        text = segment['text']
+        if translation_model:
+            text = translate_text(text, tokenizer, translation_model)
+        para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
+        if rtl:
+            para.paragraph_format.right_to_left = True
+    doc.save(output_file)
+# Helper function to reverse text for RTL
+def reverse_text_for_rtl(text):
+    return ' '.join([word[::-1] for word in text.split()])
+# Helper function to write PDF documents
+def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
+    # Create PDF with A4 page size
+    c = canvas.Canvas(output_file, pagesize=A4)
+    # Get the directory where app.py is located
+    app_dir = os.path.dirname(os.path.abspath(__file__))
+    # Construct the full path to the font files
+    nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
+    arial_font_path = os.path.join(app_dir, 'Arial.ttf')
+    # Register B-Nazanin font
+    if os.path.exists(nazanin_font_path):
+        try:
+            pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
+        except Exception as e:
+            raise RuntimeError(f"Error registering B-Nazanin font: {e}.")
+    else:
+        raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
+    # Register Arial font
+    if os.path.exists(arial_font_path):
+        try:
+            pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
+        except Exception as e:
+            raise RuntimeError(f"Error registering Arial font: {e}.")
+    else:
+        raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
+    # Initialize y position from top of page
+    y_position = A4[1] - 50  # Start 50 points from top
+    line_height = 20
+    # Process each segment
+    for i, segment in enumerate(transcription['segments']):
+        text = segment['text']
+        # Translate if translation model is provided
+        if translation_model:
+            text = translate_text(text, tokenizer, translation_model)
+        # Format the line with segment number
+        line = f"{i + 1}. {text.strip()}"
+        # Determine target language for font and text direction
+        target_language = None
+        if translation_model:
+            # Assuming target language can be inferred from the tokenizer
+            target_language = tokenizer.tgt_lang
+        # Reshape and reorder the text for correct RTL display if necessary
+        if target_language in ['fa', 'ar']:
+            reshaped_text = arabic_reshaper.reshape(line)
+            bidi_text = get_display(reshaped_text)
+            # Set font for RTL languages
+            c.setFont('B-Nazanin', 12)
+            # Draw the text right-aligned
+            c.drawRightString(A4[0] - 50, y_position, bidi_text)  # 50 points margin from right
+        else:
+            c.setFont('Arial', 12)  # Use Arial for other languages
+            c.drawString(50, y_position, line)  # Left aligned
+        # Add new page if needed
+        if y_position < 50:  # Leave 50 points margin at bottom
+            c.showPage()
+            y_position = A4[1] - 50  # Reset y position for new page
+        # Update y position for next line
+        y_position -= line_height
+    # Save the PDF
+    c.save()
+    return output_file
+# Helper function to write PowerPoint slides
+def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
+    ppt = Presentation()
+    slide = ppt.slides.add_slide(ppt.slide_layouts[5])  # Create the first slide
+    text_buffer = ""  # Initialize an empty buffer to accumulate text
+    max_chars_per_slide = 400  # Set a character limit for each slide
+    for i, segment in enumerate(transcription['segments']):
+        text = segment['text']
+        # Translate if translation model is provided
+        if translation_model:
+            text = translate_text(text, tokenizer, translation_model)
+        # Format the line with segment number
+        line = f"{i + 1}. {text.strip()}\n"
+        # Check if adding this line exceeds the character limit
+        if len(text_buffer) + len(line) > max_chars_per_slide:
+            # If so, add the accumulated text to the current slide
+            slide.shapes.title.text = "Transcription"  # Set the title for the slide
+            textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
+            textbox.text = text_buffer.strip()
+            # Create a new slide and reset the buffer
+            slide = ppt.slides.add_slide(ppt.slide_layouts[5])
+            text_buffer = line  # Start the new slide with the current line
+        else:
+            # Otherwise, keep accumulating text
+            text_buffer += line
+    # Add any remaining text in the buffer to the last slide
+    if text_buffer:
+        slide.shapes.title.text = ""  # Set the title for the last slide
+        textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
+        textbox.text = text_buffer.strip()
+    ppt.save(output_file)
+# Function to download YouTube video
+def download_youtube_video(url):
+    ydl_opts = {
+        'format': 'mp4',
+        'outtmpl': 'downloaded_video.mp4',
+        'nocheckcertificate': True,  # Disable certificate check
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return 'downloaded_video.mp4'
+# Transcribing video and generating output
+def transcribe_video(video_file, video_url, language, target_language, output_format):
+    if video_url:
+        video_file_path = download_youtube_video(video_url)
+    else:
+        video_file_path = video_file.name
+    result = model.transcribe(video_file_path, language=language)
+    video_name = os.path.splitext(video_file_path)[0]
+    if target_language != "en":
+        try:
+            tokenizer, translation_model = load_translation_model(target_language)
+        except Exception as e:
+            raise RuntimeError(f"Error loading translation model: {e}")
+    else:
+        tokenizer, translation_model = None, None
+    srt_file = f"{video_name}.srt"
+    write_srt(result, srt_file, tokenizer, translation_model)
+    if output_format == "SRT":
+        return srt_file
+    elif output_format == "Video with Hardsub":
+        output_video = f"{video_name}_with_subtitles.mp4"
+        try:
+            embed_hardsub_in_video(video_file_path, srt_file, output_video)
+            return output_video
+        except Exception as e:
+            raise RuntimeError(f"Error embedding subtitles in video: {e}")
+    elif output_format == "Word":
+        word_file = f"{video_name}.docx"
+        write_word(result, word_file, tokenizer, translation_model, target_language)
+        return word_file
+    elif output_format == "PDF":
+        pdf_file = f"{video_name}.pdf"
+        write_pdf(result, pdf_file, tokenizer, translation_model)
+        return pdf_file
+    elif output_format == "PowerPoint":
+        ppt_file = f"{video_name}.pptx"
+        write_ppt(result, ppt_file, tokenizer, translation_model)
+        return ppt_file
+# Gradio interface with YouTube URL
+iface = gr.Interface(
+    fn=transcribe_video,
+    inputs=[
+        gr.File(label="Upload Video File (or leave empty for YouTube link)"),  # Removed 'optional=True'
+        gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
+        gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
+        gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
+        gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
+    ],
+    outputs=gr.File(label="Download File"),
+    title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
+    description=(
+        "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
+        "translate the subtitles into multiple languages using M2M100, and export them "
+        "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
+    ),
+    theme="compact",
+    live=False
+)
+if __name__ == "__main__":
+    iface.launch()