whisper-demo-es-medium / videocreator.py
juancopi81's picture
Add gradio import
d5ba4e3
raw
history blame
1.81 kB
import gradio as gr
from typing import Dict
from moviepy.editor import VideoFileClip, concatenate_videoclips
class VideoCreator:
def __init__(self,
tts_pipeline,
image_pipeline) -> None:
self.tts_pipeline = tts_pipeline
self.image_pipeline = image_pipeline
def create_video(self, scenes: Dict) -> Dict:
videos_dict = {}
for index, scene in enumerate(scenes):
video_scene = self._create_video_from_scene(scenes[scene])
videos_dict[scene] = video_scene
merged_video = self._merge_videos(videos_dict)
return merged_video
def _create_video_from_scene(self, scene: Dict) -> str:
audio_file = self._get_audio_from_text(scene["Summary"])
bg_image = self._get_bg_image_from_description(scene["Illustration"])
video = gr.make_waveform(audio=audio_file,
bg_image=bg_image)
return video
def _get_audio_from_text(self, voice_over: str) -> str:
self.tts_pipeline.tts_to_file(text=voice_over,
file_path="output.wav")
return "output.wav"
def _get_bg_image_from_description(self, img_desc: str):
images = self.image_pipeline(img_desc)
print("Image generated!")
image_output = images.images[0]
image_output.save("img.png")
return "img.png"
def _merge_videos(self, videos_dict: Dict):
videos_to_concatenate = []
for video in range(len(videos_dict)):
video_clip = VideoFileClip(videos_dict[video])
videos_to_concatenate.append(video_clip)
final_video = concatenate_videoclips(videos_to_concatenate)
final_video.write_videofile("final_video.mp4")