Artificial-superintelligence commited on
Commit
3369106
1 Parent(s): c56ed60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -278
app.py CHANGED
@@ -1,299 +1,196 @@
1
  import streamlit as st
2
- from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip
3
  import whisper
4
- from translate import Translator
5
  from gtts import gTTS
 
6
  import tempfile
7
  import os
8
  import numpy as np
9
- import shutil
10
- from pathlib import Path
11
- import time
12
-
13
- # Set page configuration
14
- st.set_page_config(
15
- page_title="Tamil Movie Dubber",
16
- page_icon="🎬",
17
- layout="wide"
18
- )
19
-
20
- # Custom CSS
21
- st.markdown("""
22
- <style>
23
- .stButton>button {
24
- width: 100%;
25
- border-radius: 5px;
26
- height: 3em;
27
- background-color: #FF4B4B;
28
- color: white;
29
- }
30
- .stProgress .st-bo {
31
- background-color: #FF4B4B;
32
- }
33
- </style>
34
- """, unsafe_allow_html=True)
35
-
36
- # Tamil voice configurations
37
- TAMIL_VOICES = {
38
- 'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'},
39
- 'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'},
40
- 'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'},
41
- 'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'}
42
- }
43
-
44
- class TamilTextProcessor:
45
- @staticmethod
46
- def normalize_tamil_text(text):
47
- """Normalize Tamil text for better pronunciation"""
48
- tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4',
49
- '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
50
- for tamil_num, eng_num in tamil_numerals.items():
51
- text = text.replace(tamil_num, eng_num)
52
- return text
53
-
54
- @staticmethod
55
- def process_for_tts(text):
56
- """Process Tamil text for TTS"""
57
- text = ''.join(char for char in text if ord(char) < 65535)
58
- text = ' '.join(text.split())
59
- return text
60
 
 
61
  @st.cache_resource
62
- def load_whisper_model():
63
- """Load Whisper model with caching"""
64
- return whisper.load_model("base")
65
-
66
- class VideoProcessor:
67
- def __init__(self):
68
- self.temp_dir = Path(tempfile.mkdtemp())
69
- self.whisper_model = load_whisper_model()
70
-
71
- def create_temp_path(self, suffix):
72
- """Create a temporary file path"""
73
- return str(self.temp_dir / f"temp_{os.urandom(4).hex()}{suffix}")
74
-
75
- def cleanup(self):
76
- """Clean up temporary directory"""
77
- try:
78
- shutil.rmtree(self.temp_dir)
79
- except Exception as e:
80
- st.warning(f"Cleanup warning: {e}")
81
-
82
- def transcribe_video(self, video_path):
83
- """Transcribe video audio using Whisper"""
84
- try:
85
- with VideoFileClip(video_path) as video:
86
- # Extract audio to temporary file
87
- audio_path = self.create_temp_path(".wav")
88
- video.audio.write_audiofile(audio_path, fps=16000, verbose=False, logger=None)
89
-
90
- # Check if audio file is not empty
91
- if os.path.getsize(audio_path) == 0:
92
- raise ValueError("Extracted audio file is empty")
93
-
94
- # Transcribe using Whisper
95
- result = self.whisper_model.transcribe(audio_path)
96
- return result["segments"], video.duration
97
-
98
- except Exception as e:
99
- raise Exception(f"Transcription error: {str(e)}")
100
-
101
- def translate_segments(self, segments):
102
- """Translate segments to Tamil"""
103
- translator = Translator(to_lang='ta')
104
- translated_segments = []
105
-
106
- for segment in segments:
107
- try:
108
- translated_text = translator.translate(segment["text"])
109
- translated_text = TamilTextProcessor.normalize_tamil_text(translated_text)
110
- translated_text = TamilTextProcessor.process_for_tts(translated_text)
111
-
112
- translated_segments.append({
113
- "text": translated_text,
114
- "start": segment["start"],
115
- "end": segment["end"],
116
- "duration": segment["end"] - segment["start"]
117
- })
118
- except Exception as e:
119
- st.warning(f"Translation warning for segment: {str(e)}")
120
- # Keep original text if translation fails
121
- translated_segments.append({
122
- "text": segment["text"],
123
- "start": segment["start"],
124
- "end": segment["end"],
125
- "duration": segment["end"] - segment["start"]
126
- })
127
-
128
- return translated_segments
129
 
130
- def generate_tamil_audio(self, text):
131
- """Generate Tamil audio using gTTS with rate limiting"""
132
- try:
133
- audio_path = self.create_temp_path(".mp3")
134
- tts = gTTS(text=text, lang='ta', slow=False)
135
- tts.save(audio_path)
136
- time.sleep(1) # Adding delay to avoid rate limit issues
137
- return audio_path
138
- except Exception as e:
139
- raise Exception(f"Audio generation error: {str(e)}")
140
 
141
- def create_subtitle_clip(self, txt, fontsize, color, size):
142
- """Create a subtitle clip"""
143
- return TextClip(
144
- txt=txt,
145
- fontsize=fontsize,
146
- color=color,
147
- bg_color='rgba(0,0,0,0.5)',
148
- size=size,
149
- method='caption'
150
- )
 
 
 
 
 
 
151
 
152
- def process_video(video_data, voice_type, generate_subtitles=True, subtitle_size=24, subtitle_color='white'):
153
- """Main video processing function"""
154
- processor = VideoProcessor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- try:
157
- # Save uploaded video to temporary file
158
- input_path = processor.create_temp_path(".mp4")
159
- with open(input_path, "wb") as f:
160
- f.write(video_data)
161
-
162
- # Load video
163
- video = VideoFileClip(input_path)
164
-
165
- # Create progress tracking
166
- progress_text = st.empty()
167
- progress_bar = st.progress(0)
168
-
169
- # Step 1: Transcribe
170
- progress_text.text("Transcribing video...")
171
- segments, duration = processor.transcribe_video(input_path)
172
- progress_bar.progress(0.25)
173
-
174
- # Step 2: Translate
175
- progress_text.text("Translating to Tamil...")
176
- translated_segments = processor.translate_segments(segments)
177
- progress_bar.progress(0.50)
178
-
179
- # Step 3: Generate audio
180
- progress_text.text("Generating Tamil audio...")
181
- subtitle_clips = []
182
- audio_clips = []
183
-
184
- for i, segment in enumerate(translated_segments):
185
- # Generate audio
186
- audio_path = processor.generate_tamil_audio(segment["text"])
187
- audio_clip = AudioFileClip(audio_path)
188
- audio_clips.append(audio_clip.set_start(segment["start"]))
189
-
190
- # Create subtitle if enabled
191
- if generate_subtitles:
192
- subtitle_clip = processor.create_subtitle_clip(
193
- segment["text"],
194
- subtitle_size,
195
- subtitle_color,
196
- (video.w, None)
197
- )
198
- subtitle_clip = (subtitle_clip
199
- .set_position(('center', 'bottom'))
200
- .set_start(segment["start"])
201
- .set_duration(segment["duration"]))
202
- subtitle_clips.append(subtitle_clip)
203
-
204
- progress_bar.progress(0.50 + (0.4 * (i + 1) / len(translated_segments)))
205
-
206
- # Step 4: Combine everything
207
- progress_text.text("Creating final video...")
208
-
209
- # Combine audio clips
210
- final_audio = concatenate_audioclips(audio_clips)
211
-
212
- # Create final video
213
- if generate_subtitles:
214
- final_video = CompositeVideoClip([video, *subtitle_clips])
215
- else:
216
- final_video = video
217
-
218
- # Set audio
219
- final_video = final_video.set_audio(final_audio)
220
-
221
- # Write final video
222
- output_path = processor.create_temp_path(".mp4")
223
- final_video.write_videofile(
224
- output_path,
225
- codec='libx264',
226
- audio_codec='aac',
227
- temp_audiofile=processor.create_temp_path(".m4a"),
228
- remove_temp=True,
229
- verbose=False,
230
- logger=None
231
- )
232
-
233
- progress_bar.progress(1.0)
234
- progress_text.text("Processing complete!")
235
-
236
- return output_path
237
-
238
- except Exception as e:
239
- raise Exception(f"Video processing error: {str(e)}")
240
-
241
- finally:
242
- # Cleanup
243
- processor.cleanup()
244
 
245
  def main():
246
- st.title("Tamil Movie Dubbing System")
247
- st.markdown("""
248
- 👋 Welcome! This tool helps you:
249
- - 🎥 Convert English videos to Tamil
250
- - 🗣️ Generate Tamil voiceovers
251
- - 📝 Add Tamil subtitles
252
- """)
253
-
254
- # File uploader
255
- video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi'])
256
 
257
- if not video_file:
258
- st.warning("Please upload a video to begin.")
 
 
 
 
 
259
  return
 
 
 
260
 
261
- # Settings
262
- col1, col2 = st.columns(2)
263
-
264
- with col1:
265
- voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys()))
266
-
267
- with col2:
268
- generate_subtitles = st.checkbox("Generate Subtitles", value=True)
269
-
270
- if generate_subtitles:
271
- col3, col4 = st.columns(2)
272
- with col3:
273
- subtitle_size = st.slider("Subtitle Size", 16, 32, 24)
274
- with col4:
275
- subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF")
276
-
277
- # Process video
278
- if st.button("Process Video"):
279
- with st.spinner("Processing video..."):
280
  try:
281
- output_video_path = process_video(
282
- video_file.read(),
283
- voice_type,
284
- generate_subtitles,
285
- subtitle_size,
286
- subtitle_color
287
- )
288
-
289
- st.video(output_video_path)
290
- st.success("Video processed successfully!")
291
-
292
- with open(output_video_path, "rb") as f:
293
- st.download_button("Download Processed Video", f, file_name="processed_video.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  except Exception as e:
296
- st.error(f"Error: {str(e)}")
297
-
 
298
  if __name__ == "__main__":
299
- main()
 
1
  import streamlit as st
2
+ from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
3
  import whisper
4
+ from transformers import MBartForConditionalGeneration, MBartTokenizer
5
  from gtts import gTTS
6
+ import torch
7
  import tempfile
8
  import os
9
  import numpy as np
10
+ from pydub import AudioSegment
11
+ import librosa
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Initialize models and configs
16
  @st.cache_resource
17
+ def load_models():
18
+ whisper_model = whisper.load_model("large")
19
+ tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
20
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
21
+ return whisper_model, tokenizer, model
22
+
23
+ # Tamil language configuration
24
+ TAMIL_CONFIG = {
25
+ 'code': 'ta',
26
+ 'whisper_code': 'tamil',
27
+ 'mbart_code': 'ta_IN',
28
+ 'gtts_code': 'ta',
29
+ 'voice_speed': 1.1, # Adjust speed for better sync
30
+ 'sample_rate': 22050
31
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Streamlit UI setup
34
+ st.set_page_config(page_title="Tamil Video Dubbing AI", page_icon="🎥", layout="wide")
 
 
 
 
 
 
 
 
35
 
36
+ def create_custom_style():
37
+ st.markdown("""
38
+ <style>
39
+ .stApp {
40
+ background-color: #f5f5f5;
41
+ }
42
+ .main {
43
+ padding: 2rem;
44
+ }
45
+ .stButton>button {
46
+ background-color: #FF4B4B;
47
+ color: white;
48
+ font-weight: bold;
49
+ }
50
+ </style>
51
+ """, unsafe_allow_html=True)
52
 
53
+ create_custom_style()
54
+
55
+ def translate_text(text, tokenizer, model):
56
+ """Enhanced translation specifically for Tamil using MBart"""
57
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
58
+ translated_tokens = model.generate(
59
+ **inputs,
60
+ forced_bos_token_id=tokenizer.lang_code_to_id["ta_IN"],
61
+ num_beams=5,
62
+ length_penalty=1.0,
63
+ max_length=512,
64
+ min_length=0,
65
+ do_sample=True,
66
+ temperature=0.7
67
+ )
68
+ return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
69
+
70
+ def process_audio_for_sync(audio_path, target_speed=1.0):
71
+ """Process audio for better synchronization"""
72
+ audio = AudioSegment.from_file(audio_path)
73
 
74
+ # Adjust speed without changing pitch
75
+ if target_speed != 1.0:
76
+ sound_with_altered_frame_rate = audio._spawn(audio.raw_data, overrides={
77
+ "frame_rate": int(audio.frame_rate * target_speed)
78
+ })
79
+ audio = sound_with_altered_frame_rate.set_frame_rate(audio.frame_rate)
80
+
81
+ return audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def main():
84
+ st.title("🎥 Tamil Video Dubbing AI")
85
+ st.markdown("### Advanced Video Translation and Dubbing System")
 
 
 
 
 
 
 
 
86
 
87
+ # Load models
88
+ try:
89
+ with st.spinner("Loading AI models..."):
90
+ whisper_model, tokenizer, translation_model = load_models()
91
+ st.success("Models loaded successfully! 🚀")
92
+ except Exception as e:
93
+ st.error(f"Error loading models: {e}")
94
  return
95
+
96
+ # File uploader with progress
97
+ video_file = st.file_uploader("Upload your video file", type=["mp4", "mov", "avi"])
98
 
99
+ if video_file:
100
+ # Video preview
101
+ st.video(video_file)
102
+
103
+ # Advanced settings
104
+ with st.expander("Advanced Settings"):
105
+ voice_speed = st.slider("Voice Speed", 0.5, 1.5, TAMIL_CONFIG['voice_speed'], 0.1)
106
+ quality_level = st.select_slider(
107
+ "Translation Quality",
108
+ options=["Draft", "Standard", "High Quality"],
109
+ value="Standard"
110
+ )
111
+
112
+ if st.button("Start Tamil Dubbing", key="start_dubbing"):
 
 
 
 
 
113
  try:
114
+ with st.spinner("Processing your video..."):
115
+ # Save uploaded video
116
+ temp_video_path = tempfile.mktemp(suffix='.mp4')
117
+ with open(temp_video_path, 'wb') as f:
118
+ f.write(video_file.read())
119
+
120
+ # Process steps with progress bar
121
+ progress_bar = st.progress(0)
122
+ status_text = st.empty()
123
+
124
+ # Extract audio
125
+ status_text.text("Extracting audio...")
126
+ video = VideoFileClip(temp_video_path)
127
+ audio_path = tempfile.mktemp(suffix=".wav")
128
+ video.audio.write_audiofile(audio_path, fps=TAMIL_CONFIG['sample_rate'])
129
+ progress_bar.progress(20)
130
+
131
+ # Transcribe
132
+ status_text.text("Transcribing audio...")
133
+ result = whisper_model.transcribe(audio_path, language=TAMIL_CONFIG['whisper_code'])
134
+ original_text = result["text"]
135
+ progress_bar.progress(40)
136
+
137
+ # Translate
138
+ status_text.text("Translating to Tamil...")
139
+ translated_text = translate_text(original_text, tokenizer, translation_model)
140
+ progress_bar.progress(60)
141
+
142
+ # Generate Tamil speech
143
+ status_text.text("Generating Tamil speech...")
144
+ tts = gTTS(text=translated_text, lang=TAMIL_CONFIG['gtts_code'])
145
+ translated_audio_path = tempfile.mktemp(suffix=".mp3")
146
+ tts.save(translated_audio_path)
147
+ progress_bar.progress(80)
148
+
149
+ # Final video creation
150
+ status_text.text("Creating final video...")
151
+ dubbed_audio = process_audio_for_sync(translated_audio_path, voice_speed)
152
+ final_audio_path = tempfile.mktemp(suffix=".wav")
153
+ dubbed_audio.export(final_audio_path, format="wav")
154
+
155
+ # Combine video with new audio
156
+ final_video_path = tempfile.mktemp(suffix=".mp4")
157
+ final_audio = AudioFileClip(final_audio_path)
158
+ final_video = video.set_audio(final_audio)
159
+ final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
160
+ progress_bar.progress(100)
161
+
162
+ # Display results
163
+ st.success("Video dubbed successfully! 🎉")
164
+ st.video(final_video_path)
165
+
166
+ # Download options
167
+ col1, col2 = st.columns(2)
168
+ with col1:
169
+ with open(final_video_path, "rb") as f:
170
+ st.download_button(
171
+ "Download Dubbed Video",
172
+ f,
173
+ file_name="tamil_dubbed_video.mp4",
174
+ mime="video/mp4"
175
+ )
176
 
177
+ with col2:
178
+ st.download_button(
179
+ "Download Tamil Script",
180
+ translated_text,
181
+ file_name="tamil_script.txt",
182
+ mime="text/plain"
183
+ )
184
+
185
+ # Clean up
186
+ for path in [temp_video_path, audio_path, translated_audio_path,
187
+ final_audio_path, final_video_path]:
188
+ if os.path.exists(path):
189
+ os.remove(path)
190
+
191
  except Exception as e:
192
+ st.error(f"An error occurred: {e}")
193
+ st.info("Please try again with a different video or check your internet connection.")
194
+
195
  if __name__ == "__main__":
196
+ main()