import streamlit as st from moviepy.editor import VideoFileClip, AudioFileClip import whisper from translate import Translator from gtts import gTTS import tempfile import os import numpy as np import time # Initialize Whisper model try: whisper_model = whisper.load_model("base") except Exception as e: st.error(f"Error loading Whisper model: {e}") # Language options LANGUAGES = { 'English': 'en', 'Tamil': 'ta', 'Sinhala': 'si', 'French': 'fr', # Add more languages as needed } st.title("AI Video Translator with Whisper and GTTS") # Step 1: Upload video file video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"]) if video_file: # Step 2: Select translation language target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys())) # Process when user clicks translate if st.button("Translate Video"): # Save video to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: temp_video.write(video_file.read()) temp_video_path = temp_video.name # Extract audio from video try: video = VideoFileClip(temp_video_path) audio_path = tempfile.mktemp(suffix=".wav") video.audio.write_audiofile(audio_path) except Exception as e: st.error(f"Error extracting audio from video: {e}") os.remove(temp_video_path) st.stop() # Function to transcribe audio in chunks def transcribe_audio_in_chunks(audio_path, model, chunk_length=30): audio_clip = whisper.load_audio(audio_path) audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds segments = [] for start in np.arange(0, audio_duration, chunk_length): end = min(start + chunk_length, audio_duration) segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)] result = model.transcribe(segment) segments.append(result['text']) return ' '.join(segments) # Function to translate text in chunks def translate_in_chunks(text, translator, max_length=500): words = text.split() chunks = [] current_chunk = "" for word in words: if len(current_chunk) + len(word) + 1 <= max_length: current_chunk += " " + word if current_chunk else word else: chunks.append(current_chunk) current_chunk = word if current_chunk: chunks.append(current_chunk) translated_chunks = [translator.translate(chunk) for chunk in chunks] return ' '.join(translated_chunks) # Transcribe audio using Whisper try: original_text = transcribe_audio_in_chunks(audio_path, whisper_model) st.write("Original Transcription:", original_text) # Translate text to the target language translator = Translator(to_lang=LANGUAGES[target_language]) translated_text = translate_in_chunks(original_text, translator) st.write(f"Translated Text ({target_language}):", translated_text) # Convert translated text to speech tts = gTTS(text=translated_text, lang=LANGUAGES[target_language]) translated_audio_path = tempfile.mktemp(suffix=".mp3") tts.save(translated_audio_path) # Merge translated audio with the original video final_video_path = tempfile.mktemp(suffix=".mp4") original_video = VideoFileClip(temp_video_path) translated_audio = AudioFileClip(translated_audio_path) final_video = original_video.set_audio(translated_audio) final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac') # Display success message and provide download link st.success("Translation successful! Download your translated video below:") st.video(final_video_path) # Provide download link with open(final_video_path, "rb") as f: st.download_button("Download Translated Video", f, file_name="translated_video.mp4") except Exception as e: st.error(f"Error during transcription/translation: {e}") translated_audio_path = None # Ensure this variable is defined final_video_path = None # Ensure this variable is defined # Clean up temporary files os.remove(temp_video_path) os.remove(audio_path) if translated_audio_path: # Only remove if it was created os.remove(translated_audio_path) if final_video_path: # Only remove if it was created os.remove(final_video_path)