# Install dependencies if not already done in your environment # pip install transformers torch gtts ffmpeg-python from transformers import pipeline from gtts import gTTS import subprocess # Step 1: Extract Text from Audio using Hugging Face Transformers def extract_text_from_audio(audio_path): # Load the ASR pipeline from Hugging Face with a Whisper-like model transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base") # Transcribe the audio file transcription = transcriber(audio_path) text = transcription["text"] # Save transcribed text to a file (optional) with open("video_text.txt", "w") as f: f.write(text) return text # Step 2: Generate Voice-Over using gTTS def generate_voice_over(text, output_audio_path="voice_over.mp3"): # Generate audio with gTTS tts = gTTS(text=text, lang="en") tts.save(output_audio_path) print(f"Voice-over saved as {output_audio_path}") return output_audio_path # Step 3: Combine Voice-Over with Original Video using FFmpeg def add_voice_over_to_video(video_path, audio_path, output_video_path="output_video_with_voice.mp4"): # Use FFmpeg to combine video with new audio ffmpeg_command = [ "ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_video_path ] subprocess.run(ffmpeg_command) print(f"Final video with voice-over saved as {output_video_path}") # Run the complete process def main(video_path): # Step 1: Extract text from video/audio text = extract_text_from_audio(video_path) print("Extracted Text:", text) # Step 2: Generate voice-over from extracted text audio_path = generate_voice_over(text) # Step 3: Add voice-over to the video add_voice_over_to_video(video_path, audio_path) # Provide the path to your input video file main("input_video.mp4")