Spaces:

akazmi
/

video

Sleeping

App Files Files Community

video / app.py

akazmi

Update app.py

d219730 verified about 1 month ago

raw

history blame

1.99 kB

	# Install dependencies if not already done in your environment
	# pip install transformers torch gtts ffmpeg-python

	from transformers import pipeline
	from gtts import gTTS
	import subprocess

	# Step 1: Extract Text from Audio using Hugging Face Transformers
	def extract_text_from_audio(audio_path):
	# Load the ASR pipeline from Hugging Face with a Whisper-like model
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")

	# Transcribe the audio file
	transcription = transcriber(audio_path)
	text = transcription["text"]

	# Save transcribed text to a file (optional)
	with open("video_text.txt", "w") as f:
	f.write(text)

	return text

	# Step 2: Generate Voice-Over using gTTS
	def generate_voice_over(text, output_audio_path="voice_over.mp3"):
	# Generate audio with gTTS
	tts = gTTS(text=text, lang="en")
	tts.save(output_audio_path)
	print(f"Voice-over saved as {output_audio_path}")
	return output_audio_path

	# Step 3: Combine Voice-Over with Original Video using FFmpeg
	def add_voice_over_to_video(video_path, audio_path, output_video_path="output_video_with_voice.mp4"):
	# Use FFmpeg to combine video with new audio
	ffmpeg_command = [
	"ffmpeg",
	"-i", video_path,
	"-i", audio_path,
	"-c:v", "copy",
	"-map", "0:v:0",
	"-map", "1:a:0",
	"-shortest",
	output_video_path
	]
	subprocess.run(ffmpeg_command)
	print(f"Final video with voice-over saved as {output_video_path}")

	# Run the complete process
	def main(video_path):
	# Step 1: Extract text from video/audio
	text = extract_text_from_audio(video_path)
	print("Extracted Text:", text)

	# Step 2: Generate voice-over from extracted text
	audio_path = generate_voice_over(text)

	# Step 3: Add voice-over to the video
	add_voice_over_to_video(video_path, audio_path)

	# Provide the path to your input video file
	main("input_video.mp4")