younes21000 commited on
Commit
ab23cbe
1 Parent(s): 3c50ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -1
app.py CHANGED
@@ -59,7 +59,175 @@ def translate_text(text, tokenizer, model):
59
  except Exception as e:
60
  raise RuntimeError(f"Error during translation: {e}")
61
 
62
- # (Other code remains unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Gradio Interface setup
65
  iface = gr.Interface(
 
59
  except Exception as e:
60
  raise RuntimeError(f"Error during translation: {e}")
61
 
62
+ # Helper function to format timestamps in SRT format
63
+ def format_timestamp(seconds):
64
+ milliseconds = int((seconds % 1) * 1000)
65
+ seconds = int(seconds)
66
+ hours = seconds // 3600
67
+ minutes = (seconds % 3600) // 60
68
+ seconds = seconds % 60
69
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
70
+
71
+ # Write subtitles in SRT format
72
+ def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
73
+ with open(output_file, "w") as f:
74
+ for i, segment in enumerate(transcription['segments']):
75
+ start = segment['start']
76
+ end = segment['end']
77
+ text = segment['text']
78
+
79
+ if translation_model:
80
+ text = translate_text(text, tokenizer, translation_model)
81
+
82
+ start_time = format_timestamp(start)
83
+ end_time = format_timestamp(end)
84
+
85
+ f.write(f"{i + 1}\n")
86
+ f.write(f"{start_time} --> {end_time}\n")
87
+ f.write(f"{text.strip()}\n\n")
88
+
89
+ # Embedding subtitles into video (hardsub)
90
+ def embed_hardsub_in_video(video_file, srt_file, output_video):
91
+ command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
92
+ try:
93
+ process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
94
+ if process.returncode != 0:
95
+ raise RuntimeError(f"ffmpeg error: {process.stderr}")
96
+ except subprocess.TimeoutExpired:
97
+ raise RuntimeError("ffmpeg process timed out.")
98
+ except Exception as e:
99
+ raise RuntimeError(f"Error running ffmpeg: {e}")
100
+
101
+ # Helper function to write Word documents
102
+ def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
103
+ doc = Document()
104
+ rtl = target_language == "fa"
105
+ for i, segment in enumerate(transcription['segments']):
106
+ text = segment['text']
107
+ if translation_model:
108
+ text = translate_text(text, tokenizer, translation_model)
109
+ para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
110
+ if rtl:
111
+ para.paragraph_format.right_to_left = True
112
+ doc.save(output_file)
113
+
114
+ # Helper function to write PDF documents
115
+ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
116
+ c = canvas.Canvas(output_file, pagesize=A4)
117
+ app_dir = os.path.dirname(os.path.abspath(__file__))
118
+
119
+ nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
120
+ arial_font_path = os.path.join(app_dir, 'Arial.ttf')
121
+
122
+ if os.path.exists(nazanin_font_path):
123
+ pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
124
+ if os.path.exists(arial_font_path):
125
+ pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
126
+
127
+ y_position = A4[1] - 50
128
+ line_height = 20
129
+
130
+ for i, segment in enumerate(transcription['segments']):
131
+ text = segment['text']
132
+ if translation_model:
133
+ text = translate_text(text, tokenizer, translation_model)
134
+
135
+ line = f"{i + 1}. {text.strip()}"
136
+ target_language = tokenizer.tgt_lang if translation_model else None
137
+
138
+ if target_language in ['fa', 'ar']:
139
+ reshaped_text = arabic_reshaper.reshape(line)
140
+ bidi_text = get_display(reshaped_text)
141
+ c.setFont('B-Nazanin', 12)
142
+ c.drawRightString(A4[0] - 50, y_position, bidi_text)
143
+ else:
144
+ c.setFont('Arial', 12)
145
+ c.drawString(50, y_position, line)
146
+
147
+ if y_position < 50:
148
+ c.showPage()
149
+ y_position = A4[1] - 50
150
+
151
+ y_position -= line_height
152
+
153
+ c.save()
154
+ return output_file
155
+
156
+ # Helper function to write PowerPoint slides
157
+ def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
158
+ ppt = Presentation()
159
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
160
+ text_buffer = ""
161
+ max_chars_per_slide = 400
162
+
163
+ for i, segment in enumerate(transcription['segments']):
164
+ text = segment['text']
165
+ if translation_model:
166
+ text = translate_text(text, tokenizer, translation_model)
167
+
168
+ line = f"{i + 1}. {text.strip()}\n"
169
+ if len(text_buffer) + len(line) > max_chars_per_slide:
170
+ slide.shapes.title.text = "Transcription"
171
+ textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
172
+ textbox.text = text_buffer.strip()
173
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
174
+ text_buffer = line
175
+ else:
176
+ text_buffer += line
177
+
178
+ if text_buffer:
179
+ slide.shapes.title.text = ""
180
+ textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
181
+ textbox.text = text_buffer.strip()
182
+
183
+ ppt.save(output_file)
184
+
185
+ # Transcribing video and generating output
186
+ def transcribe_video(video_file, language, target_language, model_name, output_format):
187
+ actual_model_name = whisper_models[model_name]
188
+ model = whisper.load_model(actual_model_name)
189
+
190
+ if video_file is not None:
191
+ video_file_path = video_file.name
192
+ else:
193
+ raise ValueError("No video file provided. Please upload a video file.")
194
+
195
+ result = model.transcribe(video_file_path, language=language)
196
+ video_name = os.path.splitext(video_file_path)[0]
197
+ if target_language != "en":
198
+ try:
199
+ tokenizer, translation_model = load_translation_model(target_language)
200
+ except Exception as e:
201
+ raise RuntimeError(f"Error loading translation model: {e}")
202
+ else:
203
+ tokenizer, translation_model = None, None
204
+
205
+ srt_file = f"{video_name}.srt"
206
+ write_srt(result, srt_file, tokenizer, translation_model)
207
+
208
+ if output_format == "SRT":
209
+ return srt_file
210
+ elif output_format == "Video with Hardsub":
211
+ output_video = f"{video_name}_with_subtitles.mp4"
212
+ try:
213
+ embed_hardsub_in_video(video_file_path, srt_file, output_video)
214
+ return output_video
215
+ except Exception as e:
216
+ raise RuntimeError(f"Error embedding subtitles in video: {e}")
217
+ elif output_format == "Word":
218
+ word_file = f"{video_name}.docx"
219
+ write_word(result, word_file, tokenizer, translation_model, target_language)
220
+ return word_file
221
+ elif output_format == "PDF":
222
+ pdf_file = f"{video_name}.pdf"
223
+ write_pdf(result, pdf_file, tokenizer, translation_model)
224
+ return pdf_file
225
+ elif output_format == "PowerPoint":
226
+ ppt_file = f"{video_name}.pptx"
227
+ write_ppt(result, ppt_file, tokenizer, translation_model)
228
+ return ppt_file
229
+ else:
230
+ raise ValueError("Invalid output format selected.")
231
 
232
  # Gradio Interface setup
233
  iface = gr.Interface(