rafaaa2105 commited on
Commit
3fb91e0
·
verified ·
1 Parent(s): c0bb918

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import moviepy.editor as mp
4
+ from moviepy.video.tools.subtitles import SubtitlesClip
5
+ from datetime import timedelta
6
+ import os
7
+ from transformers import MarianMTModel, MarianTokenizer
8
+ import torch
9
+ import spaces
10
+
11
+ # Dictionary of supported languages and their codes for MarianMT
12
+ LANGUAGE_CODES = {
13
+ "English": "en",
14
+ "Spanish": "es",
15
+ "French": "fr",
16
+ "German": "de",
17
+ "Italian": "it",
18
+ "Portuguese": "pt",
19
+ "Russian": "ru",
20
+ "Chinese": "zh",
21
+ "Japanese": "ja",
22
+ "Korean": "ko"
23
+ }
24
+
25
+ # Mapping of language pairs to MarianMT model names
26
+ def get_model_name(source_lang, target_lang):
27
+ return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
28
+
29
+ def format_timestamp(seconds):
30
+ """Convert seconds to SRT timestamp format"""
31
+ td = timedelta(seconds=seconds)
32
+ hours = td.seconds//3600
33
+ minutes = (td.seconds//60)%60
34
+ seconds = td.seconds%60
35
+ milliseconds = td.microseconds//1000
36
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
37
+
38
+ def translate_text(text, source_lang, target_lang):
39
+ """Translate text using MarianMT"""
40
+ if source_lang == target_lang:
41
+ return text
42
+
43
+ try:
44
+ model_name = get_model_name(source_lang, target_lang)
45
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
46
+ model = MarianMTModel.from_pretrained(model_name)
47
+
48
+ # Tokenize and translate
49
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
50
+ translated = model.generate(**inputs)
51
+ translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
52
+
53
+ return translated_text
54
+ except Exception as e:
55
+ print(f"Translation error: {e}")
56
+ return text # Return original text if translation fails
57
+
58
+ def create_srt(segments, target_lang="en"):
59
+ """Convert whisper segments to SRT format with optional translation"""
60
+ srt_content = ""
61
+ for i, segment in enumerate(segments, start=1):
62
+ start_time = format_timestamp(segment['start'])
63
+ end_time = format_timestamp(segment['end'])
64
+ text = segment['text'].strip()
65
+
66
+ # Translate if target language is different
67
+ if 'language' in segment and segment['language'] != target_lang:
68
+ text = translate_text(text, segment['language'], target_lang)
69
+
70
+ srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
71
+ return srt_content
72
+
73
+ def create_subtitle_clips(segments, videosize, target_lang="en"):
74
+ """Create subtitle clips for moviepy with translation support"""
75
+ subtitle_clips = []
76
+
77
+ for segment in segments:
78
+ start_time = segment['start']
79
+ end_time = segment['end']
80
+ duration = end_time - start_time
81
+ text = segment['text'].strip()
82
+
83
+ # Translate if target language is different
84
+ if 'language' in segment and segment['language'] != target_lang:
85
+ text = translate_text(text, segment['language'], target_lang)
86
+
87
+ text_clip = mp.TextClip(
88
+ text,
89
+ font='Arial',
90
+ fontsize=24,
91
+ color='white',
92
+ stroke_color='black',
93
+ stroke_width=1,
94
+ size=videosize,
95
+ method='caption'
96
+ ).set_position(('center', 'bottom'))
97
+
98
+ text_clip = text_clip.set_start(start_time).set_duration(duration)
99
+ subtitle_clips.append(text_clip)
100
+
101
+ return subtitle_clips
102
+
103
+ @spaces.GPU
104
+ def process_video(video_path, target_lang="en"):
105
+ """Main function to process video and add subtitles with translation"""
106
+ # Load Whisper model
107
+ model = whisper.load_model("base")
108
+
109
+ # Extract audio from video
110
+ video = mp.VideoFileClip(video_path)
111
+ audio = video.audio
112
+
113
+ # Save audio temporarily
114
+ temp_audio_path = "temp_audio.wav"
115
+ audio.write_audiofile(temp_audio_path)
116
+
117
+ # Transcribe audio
118
+ result = model.transcribe(temp_audio_path)
119
+
120
+ # Add detected language to segments
121
+ for segment in result["segments"]:
122
+ segment['language'] = result["language"]
123
+
124
+ # Create SRT content with translation
125
+ srt_content = create_srt(result["segments"], target_lang)
126
+
127
+ # Save SRT file
128
+ video_name = os.path.splitext(os.path.basename(video_path))[0]
129
+ srt_path = f"{video_name}_subtitles_{target_lang}.srt"
130
+ with open(srt_path, "w", encoding="utf-8") as f:
131
+ f.write(srt_content)
132
+
133
+ # Create subtitle clips with translation
134
+ subtitle_clips = create_subtitle_clips(result["segments"], video.size, target_lang)
135
+
136
+ # Combine video with subtitles
137
+ final_video = mp.CompositeVideoClip([video] + subtitle_clips)
138
+
139
+ # Save final video
140
+ output_video_path = f"{video_name}_with_subtitles_{target_lang}.mp4"
141
+ final_video.write_videofile(output_video_path)
142
+
143
+ # Clean up
144
+ os.remove(temp_audio_path)
145
+ video.close()
146
+ final_video.close()
147
+
148
+ return output_video_path, srt_path
149
+
150
+ def gradio_interface(video_file, target_language):
151
+ """Gradio interface function with language selection"""
152
+ try:
153
+ video_path = video_file.name
154
+ target_lang = LANGUAGE_CODES[target_language]
155
+ output_video, srt_file = process_video(video_path, target_lang)
156
+ return output_video, srt_file
157
+ except Exception as e:
158
+ return str(e), None
159
+
160
+ # Create Gradio interface
161
+ iface = gr.Interface(
162
+ fn=gradio_interface,
163
+ inputs=[
164
+ gr.Video(label="Upload Video"),
165
+ gr.Dropdown(
166
+ choices=list(LANGUAGE_CODES.keys()),
167
+ value="English",
168
+ label="Target Language"
169
+ )
170
+ ],
171
+ outputs=[
172
+ gr.Video(label="Video with Subtitles"),
173
+ gr.File(label="SRT Subtitle File")
174
+ ],
175
+ title="Video Subtitler with Translation",
176
+ description="Upload a video to generate subtitles, translate them to your chosen language, and embed them directly in the video."
177
+ )
178
+
179
+ if __name__ == "__main__":
180
+ iface.launch()