|
|
|
|
|
|
|
from transformers import pipeline |
|
from gtts import gTTS |
|
import subprocess |
|
|
|
|
|
def extract_text_from_audio(audio_path): |
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base") |
|
|
|
|
|
transcription = transcriber(audio_path) |
|
text = transcription["text"] |
|
|
|
|
|
with open("video_text.txt", "w") as f: |
|
f.write(text) |
|
|
|
return text |
|
|
|
|
|
def generate_voice_over(text, output_audio_path="voice_over.mp3"): |
|
|
|
tts = gTTS(text=text, lang="en") |
|
tts.save(output_audio_path) |
|
print(f"Voice-over saved as {output_audio_path}") |
|
return output_audio_path |
|
|
|
|
|
def add_voice_over_to_video(video_path, audio_path, output_video_path="output_video_with_voice.mp4"): |
|
|
|
ffmpeg_command = [ |
|
"ffmpeg", |
|
"-i", video_path, |
|
"-i", audio_path, |
|
"-c:v", "copy", |
|
"-map", "0:v:0", |
|
"-map", "1:a:0", |
|
"-shortest", |
|
output_video_path |
|
] |
|
subprocess.run(ffmpeg_command) |
|
print(f"Final video with voice-over saved as {output_video_path}") |
|
|
|
|
|
def main(video_path): |
|
|
|
text = extract_text_from_audio(video_path) |
|
print("Extracted Text:", text) |
|
|
|
|
|
audio_path = generate_voice_over(text) |
|
|
|
|
|
add_voice_over_to_video(video_path, audio_path) |
|
|
|
|
|
main("input_video.mp4") |
|
|