Spaces:
Running
on
Zero
Running
on
Zero
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import whisper
|
3 |
+
import moviepy.editor as mp
|
4 |
+
from moviepy.video.tools.subtitles import SubtitlesClip
|
5 |
+
from datetime import timedelta
|
6 |
+
import os
|
7 |
+
from transformers import MarianMTModel, MarianTokenizer
|
8 |
+
import torch
|
9 |
+
import spaces
|
10 |
+
|
11 |
+
# Dictionary of supported languages and their codes for MarianMT
|
12 |
+
LANGUAGE_CODES = {
|
13 |
+
"English": "en",
|
14 |
+
"Spanish": "es",
|
15 |
+
"French": "fr",
|
16 |
+
"German": "de",
|
17 |
+
"Italian": "it",
|
18 |
+
"Portuguese": "pt",
|
19 |
+
"Russian": "ru",
|
20 |
+
"Chinese": "zh",
|
21 |
+
"Japanese": "ja",
|
22 |
+
"Korean": "ko"
|
23 |
+
}
|
24 |
+
|
25 |
+
# Mapping of language pairs to MarianMT model names
|
26 |
+
def get_model_name(source_lang, target_lang):
|
27 |
+
return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
28 |
+
|
29 |
+
def format_timestamp(seconds):
|
30 |
+
"""Convert seconds to SRT timestamp format"""
|
31 |
+
td = timedelta(seconds=seconds)
|
32 |
+
hours = td.seconds//3600
|
33 |
+
minutes = (td.seconds//60)%60
|
34 |
+
seconds = td.seconds%60
|
35 |
+
milliseconds = td.microseconds//1000
|
36 |
+
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
37 |
+
|
38 |
+
def translate_text(text, source_lang, target_lang):
|
39 |
+
"""Translate text using MarianMT"""
|
40 |
+
if source_lang == target_lang:
|
41 |
+
return text
|
42 |
+
|
43 |
+
try:
|
44 |
+
model_name = get_model_name(source_lang, target_lang)
|
45 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
46 |
+
model = MarianMTModel.from_pretrained(model_name)
|
47 |
+
|
48 |
+
# Tokenize and translate
|
49 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
50 |
+
translated = model.generate(**inputs)
|
51 |
+
translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
|
52 |
+
|
53 |
+
return translated_text
|
54 |
+
except Exception as e:
|
55 |
+
print(f"Translation error: {e}")
|
56 |
+
return text # Return original text if translation fails
|
57 |
+
|
58 |
+
def create_srt(segments, target_lang="en"):
|
59 |
+
"""Convert whisper segments to SRT format with optional translation"""
|
60 |
+
srt_content = ""
|
61 |
+
for i, segment in enumerate(segments, start=1):
|
62 |
+
start_time = format_timestamp(segment['start'])
|
63 |
+
end_time = format_timestamp(segment['end'])
|
64 |
+
text = segment['text'].strip()
|
65 |
+
|
66 |
+
# Translate if target language is different
|
67 |
+
if 'language' in segment and segment['language'] != target_lang:
|
68 |
+
text = translate_text(text, segment['language'], target_lang)
|
69 |
+
|
70 |
+
srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
|
71 |
+
return srt_content
|
72 |
+
|
73 |
+
def create_subtitle_clips(segments, videosize, target_lang="en"):
|
74 |
+
"""Create subtitle clips for moviepy with translation support"""
|
75 |
+
subtitle_clips = []
|
76 |
+
|
77 |
+
for segment in segments:
|
78 |
+
start_time = segment['start']
|
79 |
+
end_time = segment['end']
|
80 |
+
duration = end_time - start_time
|
81 |
+
text = segment['text'].strip()
|
82 |
+
|
83 |
+
# Translate if target language is different
|
84 |
+
if 'language' in segment and segment['language'] != target_lang:
|
85 |
+
text = translate_text(text, segment['language'], target_lang)
|
86 |
+
|
87 |
+
text_clip = mp.TextClip(
|
88 |
+
text,
|
89 |
+
font='Arial',
|
90 |
+
fontsize=24,
|
91 |
+
color='white',
|
92 |
+
stroke_color='black',
|
93 |
+
stroke_width=1,
|
94 |
+
size=videosize,
|
95 |
+
method='caption'
|
96 |
+
).set_position(('center', 'bottom'))
|
97 |
+
|
98 |
+
text_clip = text_clip.set_start(start_time).set_duration(duration)
|
99 |
+
subtitle_clips.append(text_clip)
|
100 |
+
|
101 |
+
return subtitle_clips
|
102 |
+
|
103 |
+
@spaces.GPU
|
104 |
+
def process_video(video_path, target_lang="en"):
|
105 |
+
"""Main function to process video and add subtitles with translation"""
|
106 |
+
# Load Whisper model
|
107 |
+
model = whisper.load_model("base")
|
108 |
+
|
109 |
+
# Extract audio from video
|
110 |
+
video = mp.VideoFileClip(video_path)
|
111 |
+
audio = video.audio
|
112 |
+
|
113 |
+
# Save audio temporarily
|
114 |
+
temp_audio_path = "temp_audio.wav"
|
115 |
+
audio.write_audiofile(temp_audio_path)
|
116 |
+
|
117 |
+
# Transcribe audio
|
118 |
+
result = model.transcribe(temp_audio_path)
|
119 |
+
|
120 |
+
# Add detected language to segments
|
121 |
+
for segment in result["segments"]:
|
122 |
+
segment['language'] = result["language"]
|
123 |
+
|
124 |
+
# Create SRT content with translation
|
125 |
+
srt_content = create_srt(result["segments"], target_lang)
|
126 |
+
|
127 |
+
# Save SRT file
|
128 |
+
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
129 |
+
srt_path = f"{video_name}_subtitles_{target_lang}.srt"
|
130 |
+
with open(srt_path, "w", encoding="utf-8") as f:
|
131 |
+
f.write(srt_content)
|
132 |
+
|
133 |
+
# Create subtitle clips with translation
|
134 |
+
subtitle_clips = create_subtitle_clips(result["segments"], video.size, target_lang)
|
135 |
+
|
136 |
+
# Combine video with subtitles
|
137 |
+
final_video = mp.CompositeVideoClip([video] + subtitle_clips)
|
138 |
+
|
139 |
+
# Save final video
|
140 |
+
output_video_path = f"{video_name}_with_subtitles_{target_lang}.mp4"
|
141 |
+
final_video.write_videofile(output_video_path)
|
142 |
+
|
143 |
+
# Clean up
|
144 |
+
os.remove(temp_audio_path)
|
145 |
+
video.close()
|
146 |
+
final_video.close()
|
147 |
+
|
148 |
+
return output_video_path, srt_path
|
149 |
+
|
150 |
+
def gradio_interface(video_file, target_language):
|
151 |
+
"""Gradio interface function with language selection"""
|
152 |
+
try:
|
153 |
+
video_path = video_file.name
|
154 |
+
target_lang = LANGUAGE_CODES[target_language]
|
155 |
+
output_video, srt_file = process_video(video_path, target_lang)
|
156 |
+
return output_video, srt_file
|
157 |
+
except Exception as e:
|
158 |
+
return str(e), None
|
159 |
+
|
160 |
+
# Create Gradio interface
|
161 |
+
iface = gr.Interface(
|
162 |
+
fn=gradio_interface,
|
163 |
+
inputs=[
|
164 |
+
gr.Video(label="Upload Video"),
|
165 |
+
gr.Dropdown(
|
166 |
+
choices=list(LANGUAGE_CODES.keys()),
|
167 |
+
value="English",
|
168 |
+
label="Target Language"
|
169 |
+
)
|
170 |
+
],
|
171 |
+
outputs=[
|
172 |
+
gr.Video(label="Video with Subtitles"),
|
173 |
+
gr.File(label="SRT Subtitle File")
|
174 |
+
],
|
175 |
+
title="Video Subtitler with Translation",
|
176 |
+
description="Upload a video to generate subtitles, translate them to your chosen language, and embed them directly in the video."
|
177 |
+
)
|
178 |
+
|
179 |
+
if __name__ == "__main__":
|
180 |
+
iface.launch()
|