import streamlit as st from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips import whisper from transformers import MBartForConditionalGeneration, MBartTokenizer from gtts import gTTS import torch import tempfile import os import numpy as np from pydub import AudioSegment import librosa import warnings warnings.filterwarnings('ignore') # Initialize models and configs @st.cache_resource def load_models(): whisper_model = whisper.load_model("large") tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") return whisper_model, tokenizer, model # Tamil language configuration TAMIL_CONFIG = { 'code': 'ta', 'whisper_code': 'tamil', 'mbart_code': 'ta_IN', 'gtts_code': 'ta', 'voice_speed': 1.1, # Adjust speed for better sync 'sample_rate': 22050 } # Streamlit UI setup st.set_page_config(page_title="Tamil Video Dubbing AI", page_icon="🎥", layout="wide") def create_custom_style(): st.markdown(""" """, unsafe_allow_html=True) create_custom_style() def translate_text(text, tokenizer, model): """Enhanced translation specifically for Tamil using MBart""" inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) translated_tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["ta_IN"], num_beams=5, length_penalty=1.0, max_length=512, min_length=0, do_sample=True, temperature=0.7 ) return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] def process_audio_for_sync(audio_path, target_speed=1.0): """Process audio for better synchronization""" audio = AudioSegment.from_file(audio_path) # Adjust speed without changing pitch if target_speed != 1.0: sound_with_altered_frame_rate = audio._spawn(audio.raw_data, overrides={ "frame_rate": int(audio.frame_rate * target_speed) }) audio = sound_with_altered_frame_rate.set_frame_rate(audio.frame_rate) return audio def main(): st.title("🎥 Tamil Video Dubbing AI") st.markdown("### Advanced Video Translation and Dubbing System") # Load models try: with st.spinner("Loading AI models..."): whisper_model, tokenizer, translation_model = load_models() st.success("Models loaded successfully! 🚀") except Exception as e: st.error(f"Error loading models: {e}") return # File uploader with progress video_file = st.file_uploader("Upload your video file", type=["mp4", "mov", "avi"]) if video_file: # Video preview st.video(video_file) # Advanced settings with st.expander("Advanced Settings"): voice_speed = st.slider("Voice Speed", 0.5, 1.5, TAMIL_CONFIG['voice_speed'], 0.1) quality_level = st.select_slider( "Translation Quality", options=["Draft", "Standard", "High Quality"], value="Standard" ) if st.button("Start Tamil Dubbing", key="start_dubbing"): try: with st.spinner("Processing your video..."): # Save uploaded video temp_video_path = tempfile.mktemp(suffix='.mp4') with open(temp_video_path, 'wb') as f: f.write(video_file.read()) # Process steps with progress bar progress_bar = st.progress(0) status_text = st.empty() # Extract audio status_text.text("Extracting audio...") video = VideoFileClip(temp_video_path) audio_path = tempfile.mktemp(suffix=".wav") video.audio.write_audiofile(audio_path, fps=TAMIL_CONFIG['sample_rate']) progress_bar.progress(20) # Transcribe status_text.text("Transcribing audio...") result = whisper_model.transcribe(audio_path, language=TAMIL_CONFIG['whisper_code']) original_text = result["text"] progress_bar.progress(40) # Translate status_text.text("Translating to Tamil...") translated_text = translate_text(original_text, tokenizer, translation_model) progress_bar.progress(60) # Generate Tamil speech status_text.text("Generating Tamil speech...") tts = gTTS(text=translated_text, lang=TAMIL_CONFIG['gtts_code']) translated_audio_path = tempfile.mktemp(suffix=".mp3") tts.save(translated_audio_path) progress_bar.progress(80) # Final video creation status_text.text("Creating final video...") dubbed_audio = process_audio_for_sync(translated_audio_path, voice_speed) final_audio_path = tempfile.mktemp(suffix=".wav") dubbed_audio.export(final_audio_path, format="wav") # Combine video with new audio final_video_path = tempfile.mktemp(suffix=".mp4") final_audio = AudioFileClip(final_audio_path) final_video = video.set_audio(final_audio) final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac') progress_bar.progress(100) # Display results st.success("Video dubbed successfully! 🎉") st.video(final_video_path) # Download options col1, col2 = st.columns(2) with col1: with open(final_video_path, "rb") as f: st.download_button( "Download Dubbed Video", f, file_name="tamil_dubbed_video.mp4", mime="video/mp4" ) with col2: st.download_button( "Download Tamil Script", translated_text, file_name="tamil_script.txt", mime="text/plain" ) # Clean up for path in [temp_video_path, audio_path, translated_audio_path, final_audio_path, final_video_path]: if os.path.exists(path): os.remove(path) except Exception as e: st.error(f"An error occurred: {e}") st.info("Please try again with a different video or check your internet connection.") if __name__ == "__main__": main()