import gradio as gr
import whisper
import os
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from docx import Document
from fpdf import FPDF
from pptx import Presentation
import subprocess
import shlex

# Load the Whisper model (smaller model for faster transcription)
model = whisper.load_model("tiny")

# Load M2M100 translation model for different languages
def load_translation_model(target_language):
    lang_codes = {
        "fa": "fa",  # Persian (Farsi)
        "es": "es",  # Spanish
        "fr": "fr",  # French
    }
    target_lang_code = lang_codes.get(target_language)
    if not target_lang_code:
        raise ValueError(f"Translation model for {target_language} not supported")

    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
    translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")

    tokenizer.src_lang = "en"
    tokenizer.tgt_lang = target_lang_code

    return tokenizer, translation_model

def translate_text(text, tokenizer, model):
    try:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
        return tokenizer.decode(translated[0], skip_special_tokens=True)
    except Exception as e:
        raise RuntimeError(f"Error during translation: {e}")

# Helper function to format timestamps in SRT format
def format_timestamp(seconds):
    milliseconds = int((seconds % 1) * 1000)
    seconds = int(seconds)
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

# Corrected write_srt function
def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
    with open(output_file, "w") as f:
        for i, segment in enumerate(transcription['segments']):
            start = segment['start']
            end = segment['end']
            text = segment['text']
            
            if translation_model:
                text = translate_text(text, tokenizer, translation_model)
            
            start_time = format_timestamp(start)
            end_time = format_timestamp(end)
            
            f.write(f"{i + 1}\n")
            f.write(f"{start_time} --> {end_time}\n")
            f.write(f"{text.strip()}\n\n")

# Embedding subtitles into video (hardsub)
def embed_hardsub_in_video(video_file, srt_file, output_video):
    command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
    try:
        process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
        if process.returncode != 0:
            raise RuntimeError(f"ffmpeg error: {process.stderr}")
    except subprocess.TimeoutExpired:
        raise RuntimeError("ffmpeg process timed out.")
    except Exception as e:
        raise RuntimeError(f"Error running ffmpeg: {e}")

# Helper function to write Word documents
def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
    doc = Document()
    rtl = target_language == "fa"
    for i, segment in enumerate(transcription['segments']):
        text = segment['text']
        if translation_model:
            text = translate_text(text, tokenizer, translation_model)
        para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
        if rtl:
            para.paragraph_format.right_to_left = True
    doc.save(output_file)

# Helper function to reverse text for RTL
def reverse_text_for_rtl(text):
    return ' '.join([word[::-1] for word in text.split()])

# Helper function to write PDF documents
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
    pdf = FPDF()
    pdf.add_page()
    font_path = "/home/user/app/B-NAZANIN.TTF"
    pdf.add_font('B-NAZANIN', '', font_path, uni=True)
    pdf.set_font('B-NAZANIN', size=12)
    for i, segment in enumerate(transcription['segments']):
        text = segment['text']
        if translation_model:
            text = translate_text(text, tokenizer, translation_model)
        reversed_text = reverse_text_for_rtl(text)
        pdf.multi_cell(0, 10, f"{i + 1}. {reversed_text.strip()}", align='R')
    pdf.output(output_file)

# Helper function to write PowerPoint slides
def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
    ppt = Presentation()
    for i, segment in enumerate(transcription['segments']):
        text = segment['text']
        if translation_model:
            text = translate_text(text, tokenizer, translation_model)
        slide = ppt.slides.add_slide(ppt.slide_layouts[5])
        title = slide.shapes.title
        title.text = f"{i + 1}. {text.strip()}"
    ppt.save(output_file)

# Transcribing video and generating output
def transcribe_video(video_file, language, target_language, output_format):
    result = model.transcribe(video_file.name, language=language)
    video_name = os.path.splitext(video_file.name)[0]
    if target_language != "en":
        try:
            tokenizer, translation_model = load_translation_model(target_language)
        except Exception as e:
            raise RuntimeError(f"Error loading translation model: {e}")
    else:
        tokenizer, translation_model = None, None

    srt_file = f"{video_name}.srt"
    write_srt(result, srt_file, tokenizer, translation_model)

    if output_format == "SRT":
        return srt_file
    elif output_format == "Video with Hardsub":
        output_video = f"{video_name}_with_subtitles.mp4"
        try:
            embed_hardsub_in_video(video_file.name, srt_file, output_video)
            return output_video
        except Exception as e:
            raise RuntimeError(f"Error embedding subtitles in video: {e}")
    elif output_format == "Word":
        word_file = f"{video_name}.docx"
        write_word(result, word_file, tokenizer, translation_model)
        return word_file
    elif output_format == "PDF":
        pdf_file = f"{video_name}.pdf"
        write_pdf(result, pdf_file, tokenizer, translation_model)
        return pdf_file
    elif output_format == "PowerPoint":
        ppt_file = f"{video_name}.pptx"
        write_ppt(result, ppt_file, tokenizer, translation_model)
        return ppt_file

# Gradio interface with better UI
iface = gr.Interface(
    fn=transcribe_video,
    inputs=[
        gr.File(label="Upload Video File"),
        gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
        gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "fr"], value="fa"),
        gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
    ],
    outputs=gr.File(label="Download File"),
    title="Video Subtitle Generator with Translation & Multi-Format Output",
    description=(
        "This tool allows you to generate subtitles from a video file using Whisper, "
        "translate the subtitles into multiple languages using M2M100, and export them "
        "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
    ),
    theme="compact",
    live=False  # No live interaction needed
)

if __name__ == "__main__":
    iface.launch()