Podfusion / animate.py
aslasdlkj's picture
Update animate.py
daaeae3
raw
history blame
7.33 kB
import os
import sys
from cgi import test
from pathlib import Path
import cv2
import mediapy
import numpy as np
from frame_interpolation.eval import interpolator, util
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
from moviepy.editor import CompositeVideoClip
from moviepy.editor import VideoFileClip as vfc
from PIL import Image
# get key positions at which frame needs to be generated
def list_of_positions(num_contours, num_frames=100):
positions = []
for i in range(0, num_frames):
positions.append(int(num_contours / num_frames * i))
return positions
def contourfinder(image1, image2, text=None, num_frames=100, output_dir=Path("temp")):
# Create two blank pages to write into
# I just hardcoded 1024*1024 as the size, ideally this should be np.shape(image1)
blank = np.zeros(np.shape(image1), dtype="uint8")
blank2 = np.zeros(np.shape(image2), dtype="uint8")
# Threshold and contours for image 1 and 2
threshold = cv2.Canny(image=image1, threshold1=100, threshold2=200)
contours, hierarchies = cv2.findContours(
threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
threshold2 = cv2.Canny(image=image2, threshold1=100, threshold2=200)
contours2, hierarchies2 = cv2.findContours(
threshold2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
# Initialize three empty videos
vid1 = cv2.VideoWriter(
Path(output_dir / "vid1.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
24,
threshold.shape,
)
vid2 = cv2.VideoWriter(
Path(output_dir / "vid2.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
24,
threshold.shape,
)
text_vid = cv2.VideoWriter(
Path(output_dir / "text_video.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
10,
threshold.shape,
)
# Get positions
positions = list_of_positions((len(contours)))
frames = []
# Loop over contours adding them to blank image then writing to video
for i in range(0, len(contours)):
cv2.drawContours(
blank, contours=contours, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank)
# Complile to video
vid1.write(blank)
vid1.release()
clip1 = vfc(Path(output_dir / "vid1.mp4").as_posix())
positions = list_of_positions((len(contours2)))
for i in range(0, len(contours2)):
cv2.drawContours(
blank2, contours=contours2, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank2)
vid2.write(blank2)
vid2.release()
clip3 = vfc(Path(output_dir / "vid2.mp4").as_posix())
# Next is the text vid
if text != None:
# Reading an image in default mode
image = np.zeros(original.shape, dtype="uint8")
# font
font = cv2.FONT_HERSHEY_COMPLEX
# org
org = (10, 400)
# fontScale
fontScale = 3
# Blue color in BGR
color = (186, 184, 108)
# Line thickness of 2 px
thickness = 4
def text_frames(text, image, org):
spacing = 55 # spacing between letters
blink = image
cv2.imwrite(Path(output_dir / "blink.png").as_posix(), blink)
for i in range(0, len(text) - 1):
text_vid.write(blink)
# Using cv2.putText() method
image = cv2.putText(
image, text[i], org, font, fontScale, color, thickness, cv2.LINE_AA
)
# Take care of org spacing
org = (org[0] + spacing, org[1])
if text[i].isupper():
org = (org[0] + spacing + 1, org[1])
print(f"Upper {text[i]}")
print(org)
# Displaying the image
cv2.imwrite(Path(output_dir / f"text_im{i}.png").as_posix, image)
# Complile to video
text_vid.write(image)
text_vid.release()
text_frames(text, image, org)
return clip1, clip3
def load_model(model_name):
model = interpolator.Interpolator(snapshot_download(repo_id=model_name), None)
return model
model_names = [
"akhaliq/frame-interpolation-film-style",
"NimaBoscarino/frame-interpolation_film_l1",
"NimaBoscarino/frame_interpolation_film_vgg",
]
models = {model_name: load_model(model_name) for model_name in model_names}
ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)
def resize(width, img):
basewidth = width
img = Image.open(img)
wpercent = basewidth / float(img.size[0])
hsize = int((float(img.size[1]) * float(wpercent)))
img = img.resize((basewidth, hsize), Image.ANTIALIAS)
return img
def resize_img(img1, img2, output_dir):
img_target_size = Image.open(img1)
img_to_resize = resize_and_crop(
img2,
(
img_target_size.size[0],
img_target_size.size[1],
), # set width and height to match cv2_images[0]
crop_origin="middle",
)
img_to_resize.save(Path(output_dir / "resized_img2.png"))
def get_video_frames(
images, vid_output_dir="temp", times_to_interpolate=6, model_name_index=0
):
frame1 = images[0]
frame2 = images[1]
model = models[model_names[model_name_index]]
cv2_images = [cv2.imread(frame1), cv2.imread(frame2)]
frame1 = resize(256, frame1)
frame2 = resize(256, frame2)
test_1 = Path(vid_output_dir / "test1.png")
test_2 = Path(vid_output_dir / "test2.png")
frame1.save(test_1)
frame2.save(test_2)
resize_img(test_1, test_2, vid_output_dir)
input_frames = [
Path(vid_output_dir / "test1.png").as_posix(),
Path(vid_output_dir / "resized_img2.png").as_posix(),
]
frames = list(
util.interpolate_recursively_from_files(
input_frames, times_to_interpolate, model
)
)
return frames, cv2_images
def create_mp4_with_audio(frames, cv2_images, duration, audio, output_path):
vid_output_dir = output_path.parent
temp_vid_path = Path(vid_output_dir / "TEMP.mp4")
mediapy.write_video(temp_vid_path, frames, fps=10)
print(
f"TYPES....{type(cv2_images[0])},{type(cv2_images[1])} SHAPES{cv2_images[0].shape} Img {cv2_images[0]}"
)
clip1, clip3 = contourfinder(
cv2_images[0], cv2_images[1], output_dir=vid_output_dir
) # has a third text option
# Use open CV and moviepy code
# So we move from open CV video 1 to out.mp4 to open CV video2
clip1 = clip1
clip2 = (
vfc(temp_vid_path.as_posix())
.resize(2)
.set_start(clip1.duration - 0.5)
.crossfadein(2)
)
clip3 = clip3.set_start((clip1.duration - 0.5) + (clip2.duration)).crossfadein(2)
new_clip = CompositeVideoClip([clip1, clip2, clip3])
new_clip.audio = audio # Naviely append audio without considering the length of the video, could be a problem, no idea, but it works, so I'm not touching it
new_clip.set_duration(duration)
new_clip.write_videofile(output_path.as_posix(), audio_codec="aac")
return output_path.as_posix()