Spaces:

Artificial-superintelligence
/

Aita

Running

App Files Files Community

Artificial-superintelligence commited on 14 days ago

Commit

3369106

•

1 Parent(s): c56ed60

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -278

app.py CHANGED Viewed

@@ -1,299 +1,196 @@
 import streamlit as st
-from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip
 import whisper
-from translate import Translator
 from gtts import gTTS
 import tempfile
 import os
 import numpy as np
-import shutil
-from pathlib import Path
-import time
-# Set page configuration
-st.set_page_config(
-    page_title="Tamil Movie Dubber",
-    page_icon="🎬",
-    layout="wide"
-)
-# Custom CSS
-st.markdown("""
-    <style>
-    .stButton>button {
-        width: 100%;
-        border-radius: 5px;
-        height: 3em;
-        background-color: #FF4B4B;
-        color: white;
-    }
-    .stProgress .st-bo {
-        background-color: #FF4B4B;
-    }
-    </style>
-    """, unsafe_allow_html=True)
-# Tamil voice configurations
-TAMIL_VOICES = {
-    'Female 1': {'name': 'ta-IN-PallaviNeural', 'style': 'normal'},
-    'Female 2': {'name': 'ta-IN-PallaviNeural', 'style': 'formal'},
-    'Male 1': {'name': 'ta-IN-ValluvarNeural', 'style': 'normal'},
-    'Male 2': {'name': 'ta-IN-ValluvarNeural', 'style': 'formal'}
-}
-class TamilTextProcessor:
-    @staticmethod
-    def normalize_tamil_text(text):
-        """Normalize Tamil text for better pronunciation"""
-        tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4',
-                         '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'}
-        for tamil_num, eng_num in tamil_numerals.items():
-            text = text.replace(tamil_num, eng_num)
-        return text
-    @staticmethod
-    def process_for_tts(text):
-        """Process Tamil text for TTS"""
-        text = ''.join(char for char in text if ord(char) < 65535)
-        text = ' '.join(text.split())
-        return text
 @st.cache_resource
-def load_whisper_model():
-    """Load Whisper model with caching"""
-    return whisper.load_model("base")
-class VideoProcessor:
-    def __init__(self):
-        self.temp_dir = Path(tempfile.mkdtemp())
-        self.whisper_model = load_whisper_model()
-    def create_temp_path(self, suffix):
-        """Create a temporary file path"""
-        return str(self.temp_dir / f"temp_{os.urandom(4).hex()}{suffix}")
-    def cleanup(self):
-        """Clean up temporary directory"""
-        try:
-            shutil.rmtree(self.temp_dir)
-        except Exception as e:
-            st.warning(f"Cleanup warning: {e}")
-    def transcribe_video(self, video_path):
-        """Transcribe video audio using Whisper"""
-        try:
-            with VideoFileClip(video_path) as video:
-                # Extract audio to temporary file
-                audio_path = self.create_temp_path(".wav")
-                video.audio.write_audiofile(audio_path, fps=16000, verbose=False, logger=None)
-                # Check if audio file is not empty
-                if os.path.getsize(audio_path) == 0:
-                    raise ValueError("Extracted audio file is empty")
-                # Transcribe using Whisper
-                result = self.whisper_model.transcribe(audio_path)
-                return result["segments"], video.duration
-        except Exception as e:
-            raise Exception(f"Transcription error: {str(e)}")
-    def translate_segments(self, segments):
-        """Translate segments to Tamil"""
-        translator = Translator(to_lang='ta')
-        translated_segments = []
-        for segment in segments:
-            try:
-                translated_text = translator.translate(segment["text"])
-                translated_text = TamilTextProcessor.normalize_tamil_text(translated_text)
-                translated_text = TamilTextProcessor.process_for_tts(translated_text)
-                translated_segments.append({
-                    "text": translated_text,
-                    "start": segment["start"],
-                    "end": segment["end"],
-                    "duration": segment["end"] - segment["start"]
-                })
-            except Exception as e:
-                st.warning(f"Translation warning for segment: {str(e)}")
-                # Keep original text if translation fails
-                translated_segments.append({
-                    "text": segment["text"],
-                    "start": segment["start"],
-                    "end": segment["end"],
-                    "duration": segment["end"] - segment["start"]
-                })
-        return translated_segments
-    def generate_tamil_audio(self, text):
-        """Generate Tamil audio using gTTS with rate limiting"""
-        try:
-            audio_path = self.create_temp_path(".mp3")
-            tts = gTTS(text=text, lang='ta', slow=False)
-            tts.save(audio_path)
-            time.sleep(1)  # Adding delay to avoid rate limit issues
-            return audio_path
-        except Exception as e:
-            raise Exception(f"Audio generation error: {str(e)}")
-    def create_subtitle_clip(self, txt, fontsize, color, size):
-        """Create a subtitle clip"""
-        return TextClip(
-            txt=txt,
-            fontsize=fontsize,
-            color=color,
-            bg_color='rgba(0,0,0,0.5)',
-            size=size,
-            method='caption'
-        )
-def process_video(video_data, voice_type, generate_subtitles=True, subtitle_size=24, subtitle_color='white'):
-    """Main video processing function"""
-    processor = VideoProcessor()
-    try:
-        # Save uploaded video to temporary file
-        input_path = processor.create_temp_path(".mp4")
-        with open(input_path, "wb") as f:
-            f.write(video_data)
-        # Load video
-        video = VideoFileClip(input_path)
-        # Create progress tracking
-        progress_text = st.empty()
-        progress_bar = st.progress(0)
-        # Step 1: Transcribe
-        progress_text.text("Transcribing video...")
-        segments, duration = processor.transcribe_video(input_path)
-        progress_bar.progress(0.25)
-        # Step 2: Translate
-        progress_text.text("Translating to Tamil...")
-        translated_segments = processor.translate_segments(segments)
-        progress_bar.progress(0.50)
-        # Step 3: Generate audio
-        progress_text.text("Generating Tamil audio...")
-        subtitle_clips = []
-        audio_clips = []
-        for i, segment in enumerate(translated_segments):
-            # Generate audio
-            audio_path = processor.generate_tamil_audio(segment["text"])
-            audio_clip = AudioFileClip(audio_path)
-            audio_clips.append(audio_clip.set_start(segment["start"]))
-            # Create subtitle if enabled
-            if generate_subtitles:
-                subtitle_clip = processor.create_subtitle_clip(
-                    segment["text"],
-                    subtitle_size,
-                    subtitle_color,
-                    (video.w, None)
-                )
-                subtitle_clip = (subtitle_clip
-                               .set_position(('center', 'bottom'))
-                               .set_start(segment["start"])
-                               .set_duration(segment["duration"]))
-                subtitle_clips.append(subtitle_clip)
-            progress_bar.progress(0.50 + (0.4 * (i + 1) / len(translated_segments)))
-        # Step 4: Combine everything
-        progress_text.text("Creating final video...")
-        # Combine audio clips
-        final_audio = concatenate_audioclips(audio_clips)
-        # Create final video
-        if generate_subtitles:
-            final_video = CompositeVideoClip([video, *subtitle_clips])
-        else:
-            final_video = video
-        # Set audio
-        final_video = final_video.set_audio(final_audio)
-        # Write final video
-        output_path = processor.create_temp_path(".mp4")
-        final_video.write_videofile(
-            output_path,
-            codec='libx264',
-            audio_codec='aac',
-            temp_audiofile=processor.create_temp_path(".m4a"),
-            remove_temp=True,
-            verbose=False,
-            logger=None
-        )
-        progress_bar.progress(1.0)
-        progress_text.text("Processing complete!")
-        return output_path
-    except Exception as e:
-        raise Exception(f"Video processing error: {str(e)}")
-    finally:
-        # Cleanup
-        processor.cleanup()
 def main():
-    st.title("Tamil Movie Dubbing System")
-    st.markdown("""
-    👋 Welcome! This tool helps you:
-    - 🎥 Convert English videos to Tamil
-    - 🗣️ Generate Tamil voiceovers
-    - 📝 Add Tamil subtitles
-    """)
-    # File uploader
-    video_file = st.file_uploader("Upload Video File", type=['mp4', 'mov', 'avi'])
-    if not video_file:
-        st.warning("Please upload a video to begin.")
         return
-    # Settings
-    col1, col2 = st.columns(2)
-    with col1:
-        voice_type = st.selectbox("Select Voice", list(TAMIL_VOICES.keys()))
-    with col2:
-        generate_subtitles = st.checkbox("Generate Subtitles", value=True)
-    if generate_subtitles:
-        col3, col4 = st.columns(2)
-        with col3:
-            subtitle_size = st.slider("Subtitle Size", 16, 32, 24)
-        with col4:
-            subtitle_color = st.color_picker("Subtitle Color", "#FFFFFF")
-    # Process video
-    if st.button("Process Video"):
-        with st.spinner("Processing video..."):
             try:
-                output_video_path = process_video(
-                    video_file.read(),
-                    voice_type,
-                    generate_subtitles,
-                    subtitle_size,
-                    subtitle_color
-                )
-                st.video(output_video_path)
-                st.success("Video processed successfully!")
-                with open(output_video_path, "rb") as f:
-                    st.download_button("Download Processed Video", f, file_name="processed_video.mp4")
             except Exception as e:
-                st.error(f"Error: {str(e)}")
 if __name__ == "__main__":
-    main()

 import streamlit as st
+from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
 import whisper
+from transformers import MBartForConditionalGeneration, MBartTokenizer
 from gtts import gTTS
+import torch
 import tempfile
 import os
 import numpy as np
+from pydub import AudioSegment
+import librosa
+import warnings
+warnings.filterwarnings('ignore')
+# Initialize models and configs
 @st.cache_resource
+def load_models():
+    whisper_model = whisper.load_model("large")
+    tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+    model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+    return whisper_model, tokenizer, model
+# Tamil language configuration
+TAMIL_CONFIG = {
+    'code': 'ta',
+    'whisper_code': 'tamil',
+    'mbart_code': 'ta_IN',
+    'gtts_code': 'ta',
+    'voice_speed': 1.1,  # Adjust speed for better sync
+    'sample_rate': 22050
+}
+# Streamlit UI setup
+st.set_page_config(page_title="Tamil Video Dubbing AI", page_icon="🎥", layout="wide")
+def create_custom_style():
+    st.markdown("""
+        <style>
+        .stApp {
+            background-color: #f5f5f5;
+        }
+        .main {
+            padding: 2rem;
+        }
+        .stButton>button {
+            background-color: #FF4B4B;
+            color: white;
+            font-weight: bold;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+create_custom_style()
+def translate_text(text, tokenizer, model):
+    """Enhanced translation specifically for Tamil using MBart"""
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    translated_tokens = model.generate(
+        **inputs,
+        forced_bos_token_id=tokenizer.lang_code_to_id["ta_IN"],
+        num_beams=5,
+        length_penalty=1.0,
+        max_length=512,
+        min_length=0,
+        do_sample=True,
+        temperature=0.7
+    )
+    return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+def process_audio_for_sync(audio_path, target_speed=1.0):
+    """Process audio for better synchronization"""
+    audio = AudioSegment.from_file(audio_path)
+    # Adjust speed without changing pitch
+    if target_speed != 1.0:
+        sound_with_altered_frame_rate = audio._spawn(audio.raw_data, overrides={
+            "frame_rate": int(audio.frame_rate * target_speed)
+        })
+        audio = sound_with_altered_frame_rate.set_frame_rate(audio.frame_rate)
+    return audio
 def main():
+    st.title("🎥 Tamil Video Dubbing AI")
+    st.markdown("### Advanced Video Translation and Dubbing System")
+    # Load models
+    try:
+        with st.spinner("Loading AI models..."):
+            whisper_model, tokenizer, translation_model = load_models()
+        st.success("Models loaded successfully! 🚀")
+    except Exception as e:
+        st.error(f"Error loading models: {e}")
         return
+    # File uploader with progress
+    video_file = st.file_uploader("Upload your video file", type=["mp4", "mov", "avi"])
+    if video_file:
+        # Video preview
+        st.video(video_file)
+        # Advanced settings
+        with st.expander("Advanced Settings"):
+            voice_speed = st.slider("Voice Speed", 0.5, 1.5, TAMIL_CONFIG['voice_speed'], 0.1)
+            quality_level = st.select_slider(
+                "Translation Quality",
+                options=["Draft", "Standard", "High Quality"],
+                value="Standard"
+            )
+        if st.button("Start Tamil Dubbing", key="start_dubbing"):
             try:
+                with st.spinner("Processing your video..."):
+                    # Save uploaded video
+                    temp_video_path = tempfile.mktemp(suffix='.mp4')
+                    with open(temp_video_path, 'wb') as f:
+                        f.write(video_file.read())
+                    # Process steps with progress bar
+                    progress_bar = st.progress(0)
+                    status_text = st.empty()
+                    # Extract audio
+                    status_text.text("Extracting audio...")
+                    video = VideoFileClip(temp_video_path)
+                    audio_path = tempfile.mktemp(suffix=".wav")
+                    video.audio.write_audiofile(audio_path, fps=TAMIL_CONFIG['sample_rate'])
+                    progress_bar.progress(20)
+                    # Transcribe
+                    status_text.text("Transcribing audio...")
+                    result = whisper_model.transcribe(audio_path, language=TAMIL_CONFIG['whisper_code'])
+                    original_text = result["text"]
+                    progress_bar.progress(40)
+                    # Translate
+                    status_text.text("Translating to Tamil...")
+                    translated_text = translate_text(original_text, tokenizer, translation_model)
+                    progress_bar.progress(60)
+                    # Generate Tamil speech
+                    status_text.text("Generating Tamil speech...")
+                    tts = gTTS(text=translated_text, lang=TAMIL_CONFIG['gtts_code'])
+                    translated_audio_path = tempfile.mktemp(suffix=".mp3")
+                    tts.save(translated_audio_path)
+                    progress_bar.progress(80)
+                    # Final video creation
+                    status_text.text("Creating final video...")
+                    dubbed_audio = process_audio_for_sync(translated_audio_path, voice_speed)
+                    final_audio_path = tempfile.mktemp(suffix=".wav")
+                    dubbed_audio.export(final_audio_path, format="wav")
+                    # Combine video with new audio
+                    final_video_path = tempfile.mktemp(suffix=".mp4")
+                    final_audio = AudioFileClip(final_audio_path)
+                    final_video = video.set_audio(final_audio)
+                    final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
+                    progress_bar.progress(100)
+                    # Display results
+                    st.success("Video dubbed successfully! 🎉")
+                    st.video(final_video_path)
+                    # Download options
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        with open(final_video_path, "rb") as f:
+                            st.download_button(
+                                "Download Dubbed Video",
+                                f,
+                                file_name="tamil_dubbed_video.mp4",
+                                mime="video/mp4"
+                            )
+                    with col2:
+                        st.download_button(
+                            "Download Tamil Script",
+                            translated_text,
+                            file_name="tamil_script.txt",
+                            mime="text/plain"
+                        )
+                    # Clean up
+                    for path in [temp_video_path, audio_path, translated_audio_path,
+                               final_audio_path, final_video_path]:
+                        if os.path.exists(path):
+                            os.remove(path)
             except Exception as e:
+                st.error(f"An error occurred: {e}")
+                st.info("Please try again with a different video or check your internet connection.")
 if __name__ == "__main__":
+    main()