younes21000 commited on
Commit
3c50ec0
·
verified ·
1 Parent(s): 7177545

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -172
app.py CHANGED
@@ -37,6 +37,7 @@ def load_translation_model(target_language):
37
  "ja": "ja", # Japanese
38
  "ko": "ko", # Korean
39
  "ru": "ru", # Russian
 
40
  }
41
  target_lang_code = lang_codes.get(target_language)
42
  if not target_lang_code:
@@ -58,177 +59,7 @@ def translate_text(text, tokenizer, model):
58
  except Exception as e:
59
  raise RuntimeError(f"Error during translation: {e}")
60
 
61
- # Helper function to format timestamps in SRT format
62
- def format_timestamp(seconds):
63
- milliseconds = int((seconds % 1) * 1000)
64
- seconds = int(seconds)
65
- hours = seconds // 3600
66
- minutes = (seconds % 3600) // 60
67
- seconds = seconds % 60
68
- return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
69
-
70
- # Corrected write_srt function
71
- def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
72
- with open(output_file, "w") as f:
73
- for i, segment in enumerate(transcription['segments']):
74
- start = segment['start']
75
- end = segment['end']
76
- text = segment['text']
77
-
78
- if translation_model:
79
- text = translate_text(text, tokenizer, translation_model)
80
-
81
- start_time = format_timestamp(start)
82
- end_time = format_timestamp(end)
83
-
84
- f.write(f"{i + 1}\n")
85
- f.write(f"{start_time} --> {end_time}\n")
86
- f.write(f"{text.strip()}\n\n")
87
-
88
- # Embedding subtitles into video (hardsub)
89
- def embed_hardsub_in_video(video_file, srt_file, output_video):
90
- command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
91
- try:
92
- process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
93
- if process.returncode != 0:
94
- raise RuntimeError(f"ffmpeg error: {process.stderr}")
95
- except subprocess.TimeoutExpired:
96
- raise RuntimeError("ffmpeg process timed out.")
97
- except Exception as e:
98
- raise RuntimeError(f"Error running ffmpeg: {e}")
99
-
100
- # Helper function to write Word documents
101
- def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
102
- doc = Document()
103
- rtl = target_language == "fa"
104
- for i, segment in enumerate(transcription['segments']):
105
- text = segment['text']
106
- if translation_model:
107
- text = translate_text(text, tokenizer, translation_model)
108
- para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
109
- if rtl:
110
- para.paragraph_format.right_to_left = True
111
- doc.save(output_file)
112
-
113
- # Helper function to write PDF documents
114
- def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
115
- # Create PDF with A4 page size
116
- c = canvas.Canvas(output_file, pagesize=A4)
117
- app_dir = os.path.dirname(os.path.abspath(__file__))
118
-
119
- # Register fonts
120
- nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
121
- arial_font_path = os.path.join(app_dir, 'Arial.ttf')
122
-
123
- if os.path.exists(nazanin_font_path):
124
- pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
125
- if os.path.exists(arial_font_path):
126
- pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
127
-
128
- y_position = A4[1] - 50
129
- line_height = 20
130
-
131
- for i, segment in enumerate(transcription['segments']):
132
- text = segment['text']
133
- if translation_model:
134
- text = translate_text(text, tokenizer, translation_model)
135
-
136
- line = f"{i + 1}. {text.strip()}"
137
- target_language = tokenizer.tgt_lang if translation_model else None
138
-
139
- if target_language in ['fa', 'ar']:
140
- reshaped_text = arabic_reshaper.reshape(line)
141
- bidi_text = get_display(reshaped_text)
142
- c.setFont('B-Nazanin', 12)
143
- c.drawRightString(A4[0] - 50, y_position, bidi_text)
144
- else:
145
- c.setFont('Arial', 12)
146
- c.drawString(50, y_position, line)
147
-
148
- if y_position < 50:
149
- c.showPage()
150
- y_position = A4[1] - 50
151
-
152
- y_position -= line_height
153
-
154
- c.save()
155
- return output_file
156
-
157
- # Helper function to write PowerPoint slides
158
- def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
159
- ppt = Presentation()
160
- slide = ppt.slides.add_slide(ppt.slide_layouts[5])
161
- text_buffer = ""
162
- max_chars_per_slide = 400
163
-
164
- for i, segment in enumerate(transcription['segments']):
165
- text = segment['text']
166
- if translation_model:
167
- text = translate_text(text, tokenizer, translation_model)
168
-
169
- line = f"{i + 1}. {text.strip()}\n"
170
- if len(text_buffer) + len(line) > max_chars_per_slide:
171
- slide.shapes.title.text = "Transcription"
172
- textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
173
- textbox.text = text_buffer.strip()
174
- slide = ppt.slides.add_slide(ppt.slide_layouts[5])
175
- text_buffer = line
176
- else:
177
- text_buffer += line
178
-
179
- if text_buffer:
180
- slide.shapes.title.text = ""
181
- textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
182
- textbox.text = text_buffer.strip()
183
-
184
- ppt.save(output_file)
185
-
186
- # Transcribing video and generating output
187
- def transcribe_video(video_file, language, target_language, model_name, output_format):
188
- actual_model_name = whisper_models[model_name] # Map user selection to model name
189
- model = whisper.load_model(actual_model_name) # Load the selected model
190
-
191
- if video_file is not None: # Ensure the video_file is not None
192
- video_file_path = video_file.name
193
- else:
194
- raise ValueError("No video file provided. Please upload a video file.")
195
-
196
- result = model.transcribe(video_file_path, language=language)
197
- video_name = os.path.splitext(video_file_path)[0]
198
- if target_language != "en":
199
- try:
200
- tokenizer, translation_model = load_translation_model(target_language)
201
- except Exception as e:
202
- raise RuntimeError(f"Error loading translation model: {e}")
203
- else:
204
- tokenizer, translation_model = None, None
205
-
206
- srt_file = f"{video_name}.srt"
207
- write_srt(result, srt_file, tokenizer, translation_model)
208
-
209
- if output_format == "SRT":
210
- return srt_file
211
- elif output_format == "Video with Hardsub":
212
- output_video = f"{video_name}_with_subtitles.mp4"
213
- try:
214
- embed_hardsub_in_video(video_file_path, srt_file, output_video)
215
- return output_video
216
- except Exception as e:
217
- raise RuntimeError(f"Error embedding subtitles in video: {e}")
218
- elif output_format == "Word":
219
- word_file = f"{video_name}.docx"
220
- write_word(result, word_file, tokenizer, translation_model, target_language)
221
- return word_file
222
- elif output_format == "PDF":
223
- pdf_file = f"{video_name}.pdf"
224
- write_pdf(result, pdf_file, tokenizer, translation_model)
225
- return pdf_file
226
- elif output_format == "PowerPoint":
227
- ppt_file = f"{video_name}.pptx"
228
- write_ppt(result, ppt_file, tokenizer, translation_model)
229
- return ppt_file
230
- else:
231
- raise ValueError("Invalid output format selected.")
232
 
233
  # Gradio Interface setup
234
  iface = gr.Interface(
@@ -236,7 +67,7 @@ iface = gr.Interface(
236
  inputs=[
237
  gr.File(label="Upload Video File"),
238
  gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
239
- gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
240
  gr.Dropdown(label="Select Whisper Model", choices=list(whisper_models.keys()), value="Tiny (Fast, Less Accurate)"),
241
  gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
242
  ],
 
37
  "ja": "ja", # Japanese
38
  "ko": "ko", # Korean
39
  "ru": "ru", # Russian
40
+ "fi": "fi" # Finnish
41
  }
42
  target_lang_code = lang_codes.get(target_language)
43
  if not target_lang_code:
 
59
  except Exception as e:
60
  raise RuntimeError(f"Error during translation: {e}")
61
 
62
+ # (Other code remains unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Gradio Interface setup
65
  iface = gr.Interface(
 
67
  inputs=[
68
  gr.File(label="Upload Video File"),
69
  gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
70
+ gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt", "fi"], value="fa"),
71
  gr.Dropdown(label="Select Whisper Model", choices=list(whisper_models.keys()), value="Tiny (Fast, Less Accurate)"),
72
  gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
73
  ],