younes21000 commited on
Commit
df147cf
1 Parent(s): 9cec946

Upload 2 files

Browse files

az tarafe younes

Files changed (2) hide show
  1. app (1).py +186 -0
  2. requirements (1).txt +9 -0
app (1).py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import os
4
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
+ from docx import Document # For Word output
6
+ from fpdf import FPDF # For PDF output
7
+ from pptx import Presentation # For PowerPoint output
8
+ import subprocess # To use ffmpeg for embedding subtitles
9
+ import shlex # For better command-line argument handling
10
+
11
+ # Load the Whisper model
12
+ model = whisper.load_model("tiny") # Smaller model for faster transcription
13
+
14
+ # Load M2M100 translation model for different languages
15
+ def load_translation_model(target_language):
16
+ lang_codes = {
17
+ "fa": "fa", # Persian (Farsi)
18
+ "es": "es", # Spanish
19
+ "fr": "fr", # French
20
+ }
21
+ target_lang_code = lang_codes.get(target_language)
22
+ if not target_lang_code:
23
+ raise ValueError(f"Translation model for {target_language} not supported")
24
+
25
+ # Load M2M100 model and tokenizer
26
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
27
+ translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
28
+
29
+ tokenizer.src_lang = "en"
30
+ tokenizer.tgt_lang = target_lang_code
31
+
32
+ return tokenizer, translation_model
33
+
34
+ def translate_text(text, tokenizer, model):
35
+ try:
36
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
37
+ translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
38
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
39
+ except Exception as e:
40
+ raise RuntimeError(f"Error during translation: {e}")
41
+
42
+ # Helper function to format timestamps in SRT format (hh:mm:ss,ms)
43
+ def format_timestamp(seconds):
44
+ milliseconds = int((seconds % 1) * 1000)
45
+ seconds = int(seconds)
46
+ hours = seconds // 3600
47
+ minutes = (seconds % 3600) // 60
48
+ seconds = seconds % 60
49
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
50
+
51
+ # Corrected write_srt function
52
+ def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
53
+ with open(output_file, "w") as f:
54
+ for i, segment in enumerate(transcription['segments']):
55
+ start = segment['start']
56
+ end = segment['end']
57
+ text = segment['text']
58
+
59
+ if translation_model:
60
+ text = translate_text(text, tokenizer, translation_model)
61
+
62
+ start_time = format_timestamp(start)
63
+ end_time = format_timestamp(end)
64
+
65
+ f.write(f"{i + 1}\n")
66
+ f.write(f"{start_time} --> {end_time}\n")
67
+ f.write(f"{text.strip()}\n\n")
68
+
69
+ def embed_hardsub_in_video(video_file, srt_file, output_video):
70
+ """Uses ffmpeg to burn subtitles into the video (hardsub)."""
71
+ command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
72
+
73
+ try:
74
+ print(f"Running command: {command}") # Debug statement
75
+ process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
76
+ print(f"ffmpeg output: {process.stdout}") # Debug statement
77
+ if process.returncode != 0:
78
+ raise RuntimeError(f"ffmpeg error: {process.stderr}") # Print the error
79
+ except subprocess.TimeoutExpired:
80
+ raise RuntimeError("ffmpeg process timed out.")
81
+ except Exception as e:
82
+ raise RuntimeError(f"Error running ffmpeg: {e}")
83
+
84
+ def write_word(transcription, output_file, tokenizer=None, translation_model=None):
85
+ """Creates a Word document from the transcription without timestamps."""
86
+ doc = Document()
87
+ for i, segment in enumerate(transcription['segments']):
88
+ text = segment['text']
89
+
90
+ if translation_model:
91
+ text = translate_text(text, tokenizer, translation_model)
92
+
93
+ doc.add_paragraph(f"{i + 1}. {text.strip()}")
94
+ doc.save(output_file)
95
+
96
+ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
97
+ """Creates a PDF document from the transcription without timestamps."""
98
+ pdf = FPDF()
99
+ pdf.set_auto_page_break(auto=True, margin=15)
100
+ pdf.add_page()
101
+ pdf.set_font("Arial", size=12)
102
+
103
+ for i, segment in enumerate(transcription['segments']):
104
+ text = segment['text']
105
+
106
+ if translation_model:
107
+ text = translate_text(text, tokenizer, translation_model)
108
+
109
+ pdf.multi_cell(0, 10, f"{i + 1}. {text.strip()}")
110
+
111
+ pdf.output(output_file)
112
+
113
+ def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
114
+ """Creates a PowerPoint presentation from the transcription without timestamps."""
115
+ ppt = Presentation()
116
+
117
+ for i, segment in enumerate(transcription['segments']):
118
+ text = segment['text']
119
+
120
+ if translation_model:
121
+ text = translate_text(text, tokenizer, translation_model)
122
+
123
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Blank slide
124
+ title = slide.shapes.title
125
+ title.text = f"{i + 1}. {text.strip()}"
126
+
127
+ ppt.save(output_file)
128
+
129
+
130
+ def transcribe_video(video_file, language, target_language, output_format):
131
+ # Transcribe the video with Whisper
132
+ result = model.transcribe(video_file.name, language=language)
133
+ video_name = os.path.splitext(video_file.name)[0]
134
+
135
+ # Load the translation model for the selected subtitle language
136
+ if target_language != "en":
137
+ try:
138
+ tokenizer, translation_model = load_translation_model(target_language)
139
+ except Exception as e:
140
+ raise RuntimeError(f"Error loading translation model: {e}")
141
+ else:
142
+ tokenizer, translation_model = None, None
143
+
144
+ # Save the SRT file
145
+ srt_file = f"{video_name}.srt"
146
+ write_srt(result, srt_file, tokenizer, translation_model)
147
+
148
+ # Output based on user's selection
149
+ if output_format == "SRT":
150
+ return srt_file
151
+ elif output_format == "Video with Hardsub":
152
+ output_video = f"{video_name}_with_subtitles.mp4"
153
+ try:
154
+ embed_hardsub_in_video(video_file.name, srt_file, output_video)
155
+ return output_video
156
+ except Exception as e:
157
+ raise RuntimeError(f"Error embedding subtitles in video: {e}")
158
+ elif output_format == "Word":
159
+ word_file = f"{video_name}.docx"
160
+ write_word(result, word_file, tokenizer, translation_model)
161
+ return word_file
162
+ elif output_format == "PDF":
163
+ pdf_file = f"{video_name}.pdf"
164
+ write_pdf(result, pdf_file, tokenizer, translation_model)
165
+ return pdf_file
166
+ elif output_format == "PowerPoint":
167
+ ppt_file = f"{video_name}.pptx"
168
+ write_ppt(result, ppt_file, tokenizer, translation_model)
169
+ return ppt_file
170
+
171
+ # Gradio interface
172
+ iface = gr.Interface(
173
+ fn=transcribe_video,
174
+ inputs=[
175
+ gr.File(label="Upload Video"),
176
+ gr.Dropdown(label="Select Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
177
+ gr.Dropdown(label="Select Subtitle Language", choices=["en", "fa", "es", "fr"], value="fa"),
178
+ gr.Radio(label="Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
179
+ ],
180
+ outputs=gr.File(label="Download Subtitles, Video, or Document"),
181
+ title="Video Subtitle Generator with Hardsub and Document Formats",
182
+ description="Upload a video file to generate subtitles in SRT format, download the video with hardsubbed subtitles, or generate Word, PDF, or PowerPoint documents using Whisper and M2M100 for translation."
183
+ )
184
+
185
+ if __name__ == "__main__":
186
+ iface.launch()
requirements (1).txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.30.0
2
+ gradio>=3.16.0
3
+ ffmpeg-python
4
+ python-docx
5
+ fpdf
6
+ python-pptx
7
+ sentencepiece # Required for M2M100 and MarianMT translation models
8
+ librosa # Required for audio processing
9
+ git+https://github.com/openai/whisper.git