younes21000 commited on
Commit
3a2791b
1 Parent(s): fe11376

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -114
app.py CHANGED
@@ -12,7 +12,6 @@ from bidi.algorithm import get_display
12
  from pptx import Presentation
13
  import subprocess
14
  import shlex
15
- import yt_dlp
16
 
17
  # Load the Whisper model (smaller model for faster transcription)
18
  model = whisper.load_model("tiny")
@@ -105,153 +104,87 @@ def write_word(transcription, output_file, tokenizer=None, translation_model=Non
105
  para.paragraph_format.right_to_left = True
106
  doc.save(output_file)
107
 
108
- # Helper function to reverse text for RTL
109
- def reverse_text_for_rtl(text):
110
- return ' '.join([word[::-1] for word in text.split()])
111
-
112
  # Helper function to write PDF documents
113
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
114
  # Create PDF with A4 page size
115
  c = canvas.Canvas(output_file, pagesize=A4)
116
- # Get the directory where app.py is located
117
  app_dir = os.path.dirname(os.path.abspath(__file__))
118
 
119
- # Construct the full path to the font files
120
  nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
121
  arial_font_path = os.path.join(app_dir, 'Arial.ttf')
122
 
123
- # Register B-Nazanin font
124
  if os.path.exists(nazanin_font_path):
125
- try:
126
- pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
127
- except Exception as e:
128
- raise RuntimeError(f"Error registering B-Nazanin font: {e}.")
129
- else:
130
- raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
131
-
132
- # Register Arial font
133
  if os.path.exists(arial_font_path):
134
- try:
135
- pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
136
- except Exception as e:
137
- raise RuntimeError(f"Error registering Arial font: {e}.")
138
- else:
139
- raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
140
 
141
- # Initialize y position from top of page
142
- y_position = A4[1] - 50 # Start 50 points from top
143
  line_height = 20
144
 
145
- # Process each segment
146
  for i, segment in enumerate(transcription['segments']):
147
  text = segment['text']
148
-
149
- # Translate if translation model is provided
150
  if translation_model:
151
  text = translate_text(text, tokenizer, translation_model)
152
 
153
- # Format the line with segment number
154
  line = f"{i + 1}. {text.strip()}"
 
155
 
156
- # Determine target language for font and text direction
157
- target_language = None
158
- if translation_model:
159
- # Assuming target language can be inferred from the tokenizer
160
- target_language = tokenizer.tgt_lang
161
-
162
- # Reshape and reorder the text for correct RTL display if necessary
163
  if target_language in ['fa', 'ar']:
164
  reshaped_text = arabic_reshaper.reshape(line)
165
  bidi_text = get_display(reshaped_text)
166
- # Set font for RTL languages
167
- c.setFont('B-Nazanin', 12)
168
- # Draw the text right-aligned
169
- c.drawRightString(A4[0] - 50, y_position, bidi_text) # 50 points margin from right
170
  else:
171
- c.setFont('Arial', 12) # Use Arial for other languages
172
- c.drawString(50, y_position, line) # Left aligned
173
 
174
- # Add new page if needed
175
- if y_position < 50: # Leave 50 points margin at bottom
176
  c.showPage()
177
- y_position = A4[1] - 50 # Reset y position for new page
178
 
179
- # Update y position for next line
180
  y_position -= line_height
181
 
182
- # Save the PDF
183
  c.save()
184
  return output_file
185
 
186
-
187
-
188
-
189
  # Helper function to write PowerPoint slides
190
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
191
  ppt = Presentation()
192
- slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Create the first slide
193
- text_buffer = "" # Initialize an empty buffer to accumulate text
194
- max_chars_per_slide = 400 # Set a character limit for each slide
195
 
196
  for i, segment in enumerate(transcription['segments']):
197
  text = segment['text']
198
-
199
- # Translate if translation model is provided
200
  if translation_model:
201
  text = translate_text(text, tokenizer, translation_model)
202
 
203
- # Format the line with segment number
204
  line = f"{i + 1}. {text.strip()}\n"
205
-
206
- # Check if adding this line exceeds the character limit
207
  if len(text_buffer) + len(line) > max_chars_per_slide:
208
- # If so, add the accumulated text to the current slide
209
- slide.shapes.title.text = "Transcription" # Set the title for the slide
210
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
211
  textbox.text = text_buffer.strip()
212
-
213
- # Create a new slide and reset the buffer
214
  slide = ppt.slides.add_slide(ppt.slide_layouts[5])
215
- text_buffer = line # Start the new slide with the current line
216
  else:
217
- # Otherwise, keep accumulating text
218
  text_buffer += line
219
 
220
- # Add any remaining text in the buffer to the last slide
221
  if text_buffer:
222
- slide.shapes.title.text = "" # Set the title for the last slide
223
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
224
  textbox.text = text_buffer.strip()
225
 
226
  ppt.save(output_file)
227
 
228
-
229
- # Function to download YouTube video
230
- def download_youtube_video(url):
231
- ydl_opts = {
232
- 'format': 'mp4',
233
- 'outtmpl': 'downloaded_video.mp4',
234
- 'nocheckcertificate': True, # Disable certificate check
235
- }
236
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
237
- ydl.download([url])
238
- return 'downloaded_video.mp4'
239
-
240
-
241
  # Transcribing video and generating output
242
  def transcribe_video(video_file, video_url, language, target_language, output_format):
243
- if video_url:
244
- video_file_path = download_youtube_video(video_url)
245
- else:
246
- video_file_path = video_file.name
247
-
248
  result = model.transcribe(video_file_path, language=language)
249
  video_name = os.path.splitext(video_file_path)[0]
 
250
  if target_language != "en":
251
- try:
252
- tokenizer, translation_model = load_translation_model(target_language)
253
- except Exception as e:
254
- raise RuntimeError(f"Error loading translation model: {e}")
255
  else:
256
  tokenizer, translation_model = None, None
257
 
@@ -262,11 +195,8 @@ def transcribe_video(video_file, video_url, language, target_language, output_fo
262
  return srt_file
263
  elif output_format == "Video with Hardsub":
264
  output_video = f"{video_name}_with_subtitles.mp4"
265
- try:
266
- embed_hardsub_in_video(video_file_path, srt_file, output_video)
267
- return output_video
268
- except Exception as e:
269
- raise RuntimeError(f"Error embedding subtitles in video: {e}")
270
  elif output_format == "Word":
271
  word_file = f"{video_name}.docx"
272
  write_word(result, word_file, tokenizer, translation_model, target_language)
@@ -280,28 +210,9 @@ def transcribe_video(video_file, video_url, language, target_language, output_fo
280
  write_ppt(result, ppt_file, tokenizer, translation_model)
281
  return ppt_file
282
 
283
- # Gradio interface with YouTube URL
284
  iface = gr.Interface(
285
  fn=transcribe_video,
286
  inputs=[
287
- gr.File(label="Upload Video File (or leave empty for YouTube link)"), # Removed 'optional=True'
288
- gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
289
- gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
290
- gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
291
- gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
292
- ],
293
- outputs=gr.File(label="Download File"),
294
- title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
295
- description=(
296
- "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
297
- "translate the subtitles into multiple languages using M2M100, and export them "
298
- "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
299
- ),
300
- theme="compact",
301
- live=False
302
- )
303
-
304
- if __name__ == "__main__":
305
- iface.launch()
306
-
307
-
 
12
  from pptx import Presentation
13
  import subprocess
14
  import shlex
 
15
 
16
  # Load the Whisper model (smaller model for faster transcription)
17
  model = whisper.load_model("tiny")
 
104
  para.paragraph_format.right_to_left = True
105
  doc.save(output_file)
106
 
 
 
 
 
107
  # Helper function to write PDF documents
108
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
109
  # Create PDF with A4 page size
110
  c = canvas.Canvas(output_file, pagesize=A4)
 
111
  app_dir = os.path.dirname(os.path.abspath(__file__))
112
 
113
+ # Register fonts
114
  nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
115
  arial_font_path = os.path.join(app_dir, 'Arial.ttf')
116
 
 
117
  if os.path.exists(nazanin_font_path):
118
+ pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
 
 
 
 
 
 
 
119
  if os.path.exists(arial_font_path):
120
+ pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
 
 
 
 
 
121
 
122
+ y_position = A4[1] - 50
 
123
  line_height = 20
124
 
 
125
  for i, segment in enumerate(transcription['segments']):
126
  text = segment['text']
 
 
127
  if translation_model:
128
  text = translate_text(text, tokenizer, translation_model)
129
 
 
130
  line = f"{i + 1}. {text.strip()}"
131
+ target_language = tokenizer.tgt_lang if translation_model else None
132
 
 
 
 
 
 
 
 
133
  if target_language in ['fa', 'ar']:
134
  reshaped_text = arabic_reshaper.reshape(line)
135
  bidi_text = get_display(reshaped_text)
136
+ c.setFont('B-Nazanin', 12)
137
+ c.drawRightString(A4[0] - 50, y_position, bidi_text)
 
 
138
  else:
139
+ c.setFont('Arial', 12)
140
+ c.drawString(50, y_position, line)
141
 
142
+ if y_position < 50:
 
143
  c.showPage()
144
+ y_position = A4[1] - 50
145
 
 
146
  y_position -= line_height
147
 
 
148
  c.save()
149
  return output_file
150
 
 
 
 
151
  # Helper function to write PowerPoint slides
152
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
153
  ppt = Presentation()
154
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
155
+ text_buffer = ""
156
+ max_chars_per_slide = 400
157
 
158
  for i, segment in enumerate(transcription['segments']):
159
  text = segment['text']
 
 
160
  if translation_model:
161
  text = translate_text(text, tokenizer, translation_model)
162
 
 
163
  line = f"{i + 1}. {text.strip()}\n"
 
 
164
  if len(text_buffer) + len(line) > max_chars_per_slide:
165
+ slide.shapes.title.text = "Transcription"
 
166
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
167
  textbox.text = text_buffer.strip()
 
 
168
  slide = ppt.slides.add_slide(ppt.slide_layouts[5])
169
+ text_buffer = line
170
  else:
 
171
  text_buffer += line
172
 
 
173
  if text_buffer:
174
+ slide.shapes.title.text = ""
175
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
176
  textbox.text = text_buffer.strip()
177
 
178
  ppt.save(output_file)
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  # Transcribing video and generating output
181
  def transcribe_video(video_file, video_url, language, target_language, output_format):
182
+ video_file_path = video_file.name
 
 
 
 
183
  result = model.transcribe(video_file_path, language=language)
184
  video_name = os.path.splitext(video_file_path)[0]
185
+
186
  if target_language != "en":
187
+ tokenizer, translation_model = load_translation_model(target_language)
 
 
 
188
  else:
189
  tokenizer, translation_model = None, None
190
 
 
195
  return srt_file
196
  elif output_format == "Video with Hardsub":
197
  output_video = f"{video_name}_with_subtitles.mp4"
198
+ embed_hardsub_in_video(video_file_path, srt_file, output_video)
199
+ return output_video
 
 
 
200
  elif output_format == "Word":
201
  word_file = f"{video_name}.docx"
202
  write_word(result, word_file, tokenizer, translation_model, target_language)
 
210
  write_ppt(result, ppt_file, tokenizer, translation_model)
211
  return ppt_file
212
 
213
+ # Gradio interface without YouTube
214
  iface = gr.Interface(
215
  fn=transcribe_video,
216
  inputs=[
217
+ gr.File(label="Upload Video File"), # Removed YouTube URL input
218
+ gr.Dropdown(label="Select Original Video Language", choices