younes21000 commited on
Commit
7e7b0c1
·
verified ·
1 Parent(s): 8fd23af

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -290
app.py DELETED
@@ -1,290 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import whisper
4
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
- from docx import Document
6
- from reportlab.pdfgen import canvas
7
- from reportlab.pdfbase.ttfonts import TTFont
8
- from reportlab.pdfbase import pdfmetrics
9
- from reportlab.lib.pagesizes import A4
10
- import arabic_reshaper
11
- from bidi.algorithm import get_display
12
- from pptx import Presentation
13
- import subprocess
14
- import shlex
15
- import yt_dlp
16
- from selenium import webdriver
17
- from selenium.webdriver.common.by import By
18
- from selenium.webdriver.chrome.service import Service as ChromeService
19
- from webdriver_manager.chrome import ChromeDriverManager
20
- from selenium.webdriver.support.ui import WebDriverWait
21
- from selenium.webdriver.support import expected_conditions as EC
22
- import time
23
-
24
- # Load the Whisper model (smaller model for faster transcription)
25
- model = whisper.load_model("tiny")
26
-
27
- # Load M2M100 translation model for different languages
28
- def load_translation_model(target_language):
29
- lang_codes = {
30
- "fa": "fa", # Persian (Farsi)
31
- "es": "es", # Spanish
32
- "fr": "fr", # French
33
- "de": "de", # German
34
- "it": "it", # Italian
35
- "pt": "pt", # Portuguese
36
- }
37
- target_lang_code = lang_codes.get(target_language)
38
- if not target_lang_code:
39
- raise ValueError(f"Translation model for {target_language} not supported")
40
-
41
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
42
- translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
43
-
44
- tokenizer.src_lang = "en"
45
- tokenizer.tgt_lang = target_lang_code
46
-
47
- return tokenizer, translation_model
48
-
49
- def translate_text(text, tokenizer, model):
50
- try:
51
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
52
- translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
53
- return tokenizer.decode(translated[0], skip_special_tokens=True)
54
- except Exception as e:
55
- raise RuntimeError(f"Error during translation: {e}")
56
-
57
- # Helper function to format timestamps in SRT format
58
- def format_timestamp(seconds):
59
- milliseconds = int((seconds % 1) * 1000)
60
- seconds = int(seconds)
61
- hours = seconds // 3600
62
- minutes = (seconds % 3600) // 60
63
- seconds = seconds % 60
64
- return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
65
-
66
- def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
67
- with open(output_file, "w") as f:
68
- for i, segment in enumerate(transcription['segments']):
69
- start = segment['start']
70
- end = segment['end']
71
- text = segment['text']
72
-
73
- if translation_model:
74
- text = translate_text(text, tokenizer, translation_model)
75
-
76
- start_time = format_timestamp(start)
77
- end_time = format_timestamp(end)
78
-
79
- f.write(f"{i + 1}\n")
80
- f.write(f"{start_time} --> {end_time}\n")
81
- f.write(f"{text.strip()}\n\n")
82
-
83
- def embed_hardsub_in_video(video_file, srt_file, output_video):
84
- command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
85
- try:
86
- process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
87
- if process.returncode != 0:
88
- raise RuntimeError(f"ffmpeg error: {process.stderr}")
89
- except subprocess.TimeoutExpired:
90
- raise RuntimeError("ffmpeg process timed out.")
91
- except Exception as e:
92
- raise RuntimeError(f"Error running ffmpeg: {e}")
93
-
94
- def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
95
- doc = Document()
96
- rtl = target_language == "fa"
97
- for i, segment in enumerate(transcription['segments']):
98
- text = segment['text']
99
- if translation_model:
100
- text = translate_text(text, tokenizer, translation_model)
101
- para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
102
- if rtl:
103
- para.paragraph_format.right_to_left = True
104
- doc.save(output_file)
105
-
106
- def reverse_text_for_rtl(text):
107
- return ' '.join([word[::-1] for word in text.split()])
108
-
109
- def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
110
- c = canvas.Canvas(output_file, pagesize=A4)
111
- app_dir = os.path.dirname(os.path.abspath(__file__))
112
- nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
113
- arial_font_path = os.path.join(app_dir, 'Arial.ttf')
114
-
115
- if os.path.exists(nazanin_font_path):
116
- try:
117
- pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
118
- except Exception as e:
119
- raise RuntimeError(f"Error registering B-Nazanin font: {e}.")
120
- else:
121
- raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
122
-
123
- if os.path.exists(arial_font_path):
124
- try:
125
- pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
126
- except Exception as e:
127
- raise RuntimeError(f"Error registering Arial font: {e}.")
128
- else:
129
- raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
130
-
131
- y_position = A4[1] - 50
132
- line_height = 20
133
-
134
- for i, segment in enumerate(transcription['segments']):
135
- text = segment['text']
136
-
137
- if translation_model:
138
- text = translate_text(text, tokenizer, translation_model)
139
-
140
- line = f"{i + 1}. {text.strip()}"
141
-
142
- target_language = None
143
- if translation_model:
144
- target_language = tokenizer.tgt_lang
145
-
146
- if target_language in ['fa', 'ar']:
147
- reshaped_text = arabic_reshaper.reshape(line)
148
- bidi_text = get_display(reshaped_text)
149
- c.setFont('B-Nazanin', 12)
150
- c.drawRightString(A4[0] - 50, y_position, bidi_text)
151
- else:
152
- c.setFont('Arial', 12)
153
- c.drawString(50, y_position, line)
154
-
155
- if y_position < 50:
156
- c.showPage()
157
- y_position = A4[1] - 50
158
-
159
- y_position -= line_height
160
-
161
- c.save()
162
- return output_file
163
-
164
- def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
165
- ppt = Presentation()
166
- slide = ppt.slides.add_slide(ppt.slide_layouts[5])
167
- text_buffer = ""
168
- max_chars_per_slide = 400
169
-
170
- for i, segment in enumerate(transcription['segments']):
171
- text = segment['text']
172
-
173
- if translation_model:
174
- text = translate_text(text, tokenizer, translation_model)
175
-
176
- line = f"{i + 1}. {text.strip()}\n"
177
-
178
- if len(text_buffer) + len(line) > max_chars_per_slide:
179
- slide.shapes.title.text = "Transcription"
180
- textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
181
- textbox.text = text_buffer.strip()
182
-
183
- slide = ppt.slides.add_slide(ppt.slide_layouts[5])
184
- text_buffer = line
185
- else:
186
- text_buffer += line
187
-
188
- if text_buffer:
189
- slide.shapes.title.text = ""
190
- textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
191
- textbox.text = text_buffer.strip()
192
-
193
- ppt.save(output_file)
194
-
195
- # Download YouTube Video using yt_dlp or Selenium
196
- def download_from_ssyoutube(modified_url):
197
- driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
198
- driver.get(modified_url)
199
-
200
- try:
201
- WebDriverWait(driver, 20).until(
202
- EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Low quality"))
203
- ).click()
204
-
205
- WebDriverWait(driver, 20).until(
206
- EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Download"))
207
- ).click()
208
-
209
- time.sleep(10)
210
- driver.quit()
211
- return "Video downloaded successfully!"
212
-
213
- except Exception as e:
214
- driver.quit()
215
- raise RuntimeError(f"Failed to download video: {e}")
216
-
217
- def modify_youtube_url(url):
218
- youtube_pos = url.find("youtube")
219
- if youtube_pos == -1:
220
- raise ValueError("Invalid YouTube URL.")
221
-
222
- modified_url = "https://ss" + url[youtube_pos:]
223
- return modified_url
224
-
225
- def download_youtube_video(url):
226
- try:
227
- modified_url = modify_youtube_url(url)
228
- return download_from_ssyoutube(modified_url)
229
- except Exception as e:
230
- raise RuntimeError(f"Error downloading YouTube video: {e}")
231
-
232
- def transcribe_video(video_file, video_url, language, target_language, output_format):
233
- if video_url:
234
- video_file_path = download_youtube_video(video_url)
235
- else:
236
- video_file_path = video_file
237
-
238
- transcription = model.transcribe(video_file_path)
239
-
240
- if target_language != "en":
241
- tokenizer, translation_model = load_translation_model(target_language)
242
- else:
243
- tokenizer, translation_model = None, None
244
-
245
- output_file = None
246
-
247
- if output_format == "SRT":
248
- output_file = "output.srt"
249
- write_srt(transcription, output_file, tokenizer, translation_model)
250
- elif output_format == "Word":
251
- output_file = "output.docx"
252
- write_word(transcription, output_file, tokenizer, translation_model, target_language)
253
- elif output_format == "PDF":
254
- output_file = "output.pdf"
255
- write_pdf(transcription, output_file, tokenizer, translation_model)
256
- elif output_format == "PPT":
257
- output_file = "output.pptx"
258
- write_ppt(transcription, output_file, tokenizer, translation_model)
259
-
260
- return output_file
261
-
262
-
263
- def main():
264
- with gr.Blocks() as app:
265
- gr.Markdown("# Transcribe, Translate and Format YouTube Video Content")
266
-
267
- video_url_input = gr.Textbox(label="YouTube Video URL (or leave blank for video file upload)")
268
- video_file_input = gr.File(label="Upload Video File (leave blank for YouTube URL)")
269
- language_input = gr.Dropdown(choices=["en"], label="Video Language", value="en")
270
- target_language_input = gr.Dropdown(choices=["en", "fa", "es", "fr", "de", "it", "pt"], label="Target Language", value="en")
271
- output_format_input = gr.Dropdown(choices=["SRT", "Word", "PDF", "PPT"], label="Output Format", value="SRT")
272
-
273
- output_file = gr.File(label="Download Transcription", interactive=False)
274
-
275
- transcribe_button = gr.Button("Transcribe & Translate")
276
-
277
- def transcribe_and_translate(video_file, video_url, language, target_language, output_format):
278
- output = transcribe_video(video_file.name if video_file else None, video_url, language, target_language, output_format)
279
- return output
280
-
281
- transcribe_button.click(
282
- transcribe_and_translate,
283
- inputs=[video_file_input, video_url_input, language_input, target_language_input, output_format_input],
284
- outputs=output_file
285
- )
286
-
287
- app.launch()
288
-
289
- if __name__ == "__main__":
290
- main()