younes21000 commited on
Commit
fe11376
1 Parent(s): 7e7b0c1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +307 -0
app.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import os
4
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
+ from docx import Document
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.pdfbase.ttfonts import TTFont
8
+ from reportlab.pdfbase import pdfmetrics
9
+ from reportlab.lib.pagesizes import A4
10
+ import arabic_reshaper
11
+ from bidi.algorithm import get_display
12
+ from pptx import Presentation
13
+ import subprocess
14
+ import shlex
15
+ import yt_dlp
16
+
17
+ # Load the Whisper model (smaller model for faster transcription)
18
+ model = whisper.load_model("tiny")
19
+
20
+ # Load M2M100 translation model for different languages
21
+ def load_translation_model(target_language):
22
+ lang_codes = {
23
+ "fa": "fa", # Persian (Farsi)
24
+ "es": "es", # Spanish
25
+ "fr": "fr", # French
26
+ "de": "de", # German
27
+ "it": "it", # Italian
28
+ "pt": "pt", # Portuguese
29
+ "ar": "ar", # Arabic
30
+ "zh": "zh", # Chinese
31
+ "hi": "hi", # Hindi
32
+ "ja": "ja", # Japanese
33
+ "ko": "ko", # Korean
34
+ "ru": "ru", # Russian
35
+ }
36
+ target_lang_code = lang_codes.get(target_language)
37
+ if not target_lang_code:
38
+ raise ValueError(f"Translation model for {target_language} not supported")
39
+
40
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
41
+ translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
42
+
43
+ tokenizer.src_lang = "en"
44
+ tokenizer.tgt_lang = target_lang_code
45
+
46
+ return tokenizer, translation_model
47
+
48
+ def translate_text(text, tokenizer, model):
49
+ try:
50
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
51
+ translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
52
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
53
+ except Exception as e:
54
+ raise RuntimeError(f"Error during translation: {e}")
55
+
56
+ # Helper function to format timestamps in SRT format
57
+ def format_timestamp(seconds):
58
+ milliseconds = int((seconds % 1) * 1000)
59
+ seconds = int(seconds)
60
+ hours = seconds // 3600
61
+ minutes = (seconds % 3600) // 60
62
+ seconds = seconds % 60
63
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
64
+
65
+ # Corrected write_srt function
66
+ def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
67
+ with open(output_file, "w") as f:
68
+ for i, segment in enumerate(transcription['segments']):
69
+ start = segment['start']
70
+ end = segment['end']
71
+ text = segment['text']
72
+
73
+ if translation_model:
74
+ text = translate_text(text, tokenizer, translation_model)
75
+
76
+ start_time = format_timestamp(start)
77
+ end_time = format_timestamp(end)
78
+
79
+ f.write(f"{i + 1}\n")
80
+ f.write(f"{start_time} --> {end_time}\n")
81
+ f.write(f"{text.strip()}\n\n")
82
+
83
+ # Embedding subtitles into video (hardsub)
84
+ def embed_hardsub_in_video(video_file, srt_file, output_video):
85
+ command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
86
+ try:
87
+ process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
88
+ if process.returncode != 0:
89
+ raise RuntimeError(f"ffmpeg error: {process.stderr}")
90
+ except subprocess.TimeoutExpired:
91
+ raise RuntimeError("ffmpeg process timed out.")
92
+ except Exception as e:
93
+ raise RuntimeError(f"Error running ffmpeg: {e}")
94
+
95
+ # Helper function to write Word documents
96
+ def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
97
+ doc = Document()
98
+ rtl = target_language == "fa"
99
+ for i, segment in enumerate(transcription['segments']):
100
+ text = segment['text']
101
+ if translation_model:
102
+ text = translate_text(text, tokenizer, translation_model)
103
+ para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
104
+ if rtl:
105
+ para.paragraph_format.right_to_left = True
106
+ doc.save(output_file)
107
+
108
+ # Helper function to reverse text for RTL
109
+ def reverse_text_for_rtl(text):
110
+ return ' '.join([word[::-1] for word in text.split()])
111
+
112
+ # Helper function to write PDF documents
113
+ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
114
+ # Create PDF with A4 page size
115
+ c = canvas.Canvas(output_file, pagesize=A4)
116
+ # Get the directory where app.py is located
117
+ app_dir = os.path.dirname(os.path.abspath(__file__))
118
+
119
+ # Construct the full path to the font files
120
+ nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
121
+ arial_font_path = os.path.join(app_dir, 'Arial.ttf')
122
+
123
+ # Register B-Nazanin font
124
+ if os.path.exists(nazanin_font_path):
125
+ try:
126
+ pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
127
+ except Exception as e:
128
+ raise RuntimeError(f"Error registering B-Nazanin font: {e}.")
129
+ else:
130
+ raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
131
+
132
+ # Register Arial font
133
+ if os.path.exists(arial_font_path):
134
+ try:
135
+ pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
136
+ except Exception as e:
137
+ raise RuntimeError(f"Error registering Arial font: {e}.")
138
+ else:
139
+ raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
140
+
141
+ # Initialize y position from top of page
142
+ y_position = A4[1] - 50 # Start 50 points from top
143
+ line_height = 20
144
+
145
+ # Process each segment
146
+ for i, segment in enumerate(transcription['segments']):
147
+ text = segment['text']
148
+
149
+ # Translate if translation model is provided
150
+ if translation_model:
151
+ text = translate_text(text, tokenizer, translation_model)
152
+
153
+ # Format the line with segment number
154
+ line = f"{i + 1}. {text.strip()}"
155
+
156
+ # Determine target language for font and text direction
157
+ target_language = None
158
+ if translation_model:
159
+ # Assuming target language can be inferred from the tokenizer
160
+ target_language = tokenizer.tgt_lang
161
+
162
+ # Reshape and reorder the text for correct RTL display if necessary
163
+ if target_language in ['fa', 'ar']:
164
+ reshaped_text = arabic_reshaper.reshape(line)
165
+ bidi_text = get_display(reshaped_text)
166
+ # Set font for RTL languages
167
+ c.setFont('B-Nazanin', 12)
168
+ # Draw the text right-aligned
169
+ c.drawRightString(A4[0] - 50, y_position, bidi_text) # 50 points margin from right
170
+ else:
171
+ c.setFont('Arial', 12) # Use Arial for other languages
172
+ c.drawString(50, y_position, line) # Left aligned
173
+
174
+ # Add new page if needed
175
+ if y_position < 50: # Leave 50 points margin at bottom
176
+ c.showPage()
177
+ y_position = A4[1] - 50 # Reset y position for new page
178
+
179
+ # Update y position for next line
180
+ y_position -= line_height
181
+
182
+ # Save the PDF
183
+ c.save()
184
+ return output_file
185
+
186
+
187
+
188
+
189
+ # Helper function to write PowerPoint slides
190
+ def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
191
+ ppt = Presentation()
192
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Create the first slide
193
+ text_buffer = "" # Initialize an empty buffer to accumulate text
194
+ max_chars_per_slide = 400 # Set a character limit for each slide
195
+
196
+ for i, segment in enumerate(transcription['segments']):
197
+ text = segment['text']
198
+
199
+ # Translate if translation model is provided
200
+ if translation_model:
201
+ text = translate_text(text, tokenizer, translation_model)
202
+
203
+ # Format the line with segment number
204
+ line = f"{i + 1}. {text.strip()}\n"
205
+
206
+ # Check if adding this line exceeds the character limit
207
+ if len(text_buffer) + len(line) > max_chars_per_slide:
208
+ # If so, add the accumulated text to the current slide
209
+ slide.shapes.title.text = "Transcription" # Set the title for the slide
210
+ textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
211
+ textbox.text = text_buffer.strip()
212
+
213
+ # Create a new slide and reset the buffer
214
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
215
+ text_buffer = line # Start the new slide with the current line
216
+ else:
217
+ # Otherwise, keep accumulating text
218
+ text_buffer += line
219
+
220
+ # Add any remaining text in the buffer to the last slide
221
+ if text_buffer:
222
+ slide.shapes.title.text = "" # Set the title for the last slide
223
+ textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
224
+ textbox.text = text_buffer.strip()
225
+
226
+ ppt.save(output_file)
227
+
228
+
229
+ # Function to download YouTube video
230
+ def download_youtube_video(url):
231
+ ydl_opts = {
232
+ 'format': 'mp4',
233
+ 'outtmpl': 'downloaded_video.mp4',
234
+ 'nocheckcertificate': True, # Disable certificate check
235
+ }
236
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
237
+ ydl.download([url])
238
+ return 'downloaded_video.mp4'
239
+
240
+
241
+ # Transcribing video and generating output
242
+ def transcribe_video(video_file, video_url, language, target_language, output_format):
243
+ if video_url:
244
+ video_file_path = download_youtube_video(video_url)
245
+ else:
246
+ video_file_path = video_file.name
247
+
248
+ result = model.transcribe(video_file_path, language=language)
249
+ video_name = os.path.splitext(video_file_path)[0]
250
+ if target_language != "en":
251
+ try:
252
+ tokenizer, translation_model = load_translation_model(target_language)
253
+ except Exception as e:
254
+ raise RuntimeError(f"Error loading translation model: {e}")
255
+ else:
256
+ tokenizer, translation_model = None, None
257
+
258
+ srt_file = f"{video_name}.srt"
259
+ write_srt(result, srt_file, tokenizer, translation_model)
260
+
261
+ if output_format == "SRT":
262
+ return srt_file
263
+ elif output_format == "Video with Hardsub":
264
+ output_video = f"{video_name}_with_subtitles.mp4"
265
+ try:
266
+ embed_hardsub_in_video(video_file_path, srt_file, output_video)
267
+ return output_video
268
+ except Exception as e:
269
+ raise RuntimeError(f"Error embedding subtitles in video: {e}")
270
+ elif output_format == "Word":
271
+ word_file = f"{video_name}.docx"
272
+ write_word(result, word_file, tokenizer, translation_model, target_language)
273
+ return word_file
274
+ elif output_format == "PDF":
275
+ pdf_file = f"{video_name}.pdf"
276
+ write_pdf(result, pdf_file, tokenizer, translation_model)
277
+ return pdf_file
278
+ elif output_format == "PowerPoint":
279
+ ppt_file = f"{video_name}.pptx"
280
+ write_ppt(result, ppt_file, tokenizer, translation_model)
281
+ return ppt_file
282
+
283
+ # Gradio interface with YouTube URL
284
+ iface = gr.Interface(
285
+ fn=transcribe_video,
286
+ inputs=[
287
+ gr.File(label="Upload Video File (or leave empty for YouTube link)"), # Removed 'optional=True'
288
+ gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
289
+ gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
290
+ gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
291
+ gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
292
+ ],
293
+ outputs=gr.File(label="Download File"),
294
+ title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
295
+ description=(
296
+ "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
297
+ "translate the subtitles into multiple languages using M2M100, and export them "
298
+ "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
299
+ ),
300
+ theme="compact",
301
+ live=False
302
+ )
303
+
304
+ if __name__ == "__main__":
305
+ iface.launch()
306
+
307
+