import os os.system("git clone https://github.com/google-research/frame-interpolation") import sys sys.path.append("frame-interpolation") import math import cv2 import numpy as np import tensorflow as tf import mediapy from PIL import Image import gradio as gr from huggingface_hub import snapshot_download from image_tools.sizes import resize_and_crop from pymatting import cutout model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style") from eval import interpolator, util interpolator = interpolator.Interpolator(model, None) ffmpeg_path = util.get_ffmpeg_path() mediapy.set_ffmpeg(ffmpeg_path) fl_ = "" fl_mask = "" def do_interpolation(frame1, frame2, interpolation, n): print("tween frames: " + str(interpolation)) print(frame1, frame2) input_frames = [frame1, frame2] frames = list( util.interpolate_recursively_from_files( input_frames, int(interpolation), interpolator)) #print(frames) mediapy.write_video(f"{n}_to_{n+1}_out.mp4", frames, fps=25) return f"{n}_to_{n+1}_out.mp4" def get_frames(video_in, step, name, n): frames = [] cap = cv2.VideoCapture(video_in) cframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cfps = int(cap.get(cv2.CAP_PROP_FPS)) print(f'frames: {cframes}, fps: {cfps}') #resize the video #clip = VideoFileClip(video_in) #check fps #if cfps > 25: # print("video rate is over 25, resetting to 25") # clip_resized = clip.resize(height=1024) # clip_resized.write_videofile("video_resized.mp4", fps=25) #else: # print("video rate is OK") # clip_resized = clip.resize(height=1024) # clip_resized.write_videofile("video_resized.mp4", fps=cfps) #print("video resized to 1024 height") # Opens the Video file with CV2 #cap = cv2.VideoCapture("video_resized.mp4") fps = cap.get(cv2.CAP_PROP_FPS) print("video fps: " + str(fps)) i=0 while(cap.isOpened()): ret, frame = cap.read() if ret == False: break #if resize_w > 0: #resize_h = resize_w / 2.0 #frame = cv2.resize(frame, (int(resize_w), int(resize_h))) cv2.imwrite(f"{str(n)}_{name}_{step}{str(i)}.png", frame) frames.append(f"{str(n)}_{name}_{step}{str(i)}.png") i+=1 cap.release() cv2.destroyAllWindows() print("broke the video into frames") return frames, fps def create_video(frames, fps, type): print("building video result") imgs = [] for j, img in enumerate(frames): imgs.append(cv2.cvtColor(cv2.imread(img).astype(np.uint8), cv2.COLOR_BGR2RGB)) mediapy.write_video(type + "_result.mp4", imgs, fps=fps) return type + "_result.mp4" def infer(f_in, interpolation, fps_output): # 1. break video into frames and get FPS #break_vid = get_frames(url_in, "vid_input_frame", "origin", resize_n) frames_list = f_in #break_vid[0] fps = 1 #break_vid[1] print(f"ORIGIN FPS: {fps}") n_frame = int(300) #limited to 300 frames #n_frame = len(frames_list) if n_frame >= len(frames_list): print("video is shorter than the cut value") n_frame = len(frames_list) # 2. prepare frames result arrays result_frames = [] print("set stop frames to: " + str(n_frame)) for idx, frame in enumerate(frames_list[0:int(n_frame)]): if idx < len(frames_list) - 1: next_frame = frames_list[idx+1] interpolated_frames = do_interpolation(frame, next_frame, interpolation, idx) # should return a list of interpolated frames break_interpolated_video = get_frames(interpolated_frames, "interpol", f"{idx}_", -1) print(break_interpolated_video[0]) for j, img in enumerate(break_interpolated_video[0][0:len(break_interpolated_video[0])-1]): print(f"IMG:{img}") os.rename(img, f"{idx}_to_{idx+1}_{j}.png") result_frames.append(f"{idx}_to_{idx+1}_{j}.png") print("frames " + str(idx) + " & " + str(idx+1) + "/" + str(n_frame) + ": done;") #print(f"CURRENT FRAMES: {result_frames}") result_frames.append(f"{frames_list[n_frame-1]}") final_vid = create_video(result_frames, fps_output, "interpolated") files = final_vid print("interpolated frames: " + str(len(frames_list)) + " -> " + str(len(result_frames))) cv2.destroyAllWindows() return final_vid, files def logscale(linear): return int(math.pow(2, linear)) def linscale(linear): return int(math.log2(linear)) def remove_bg(fl, count, mh, ms, md, lm, b, d): global fl_ fr = cv2.imread(fl).astype(np.uint8) #b = 3 #element = cv2.getStructuringElement(cv2.MORPH_RECT, (2 * b + 1, 2 * b + 1), (b, b)) n = int((fr.shape[0]*fr.shape[1]) / (256*256)) fr_bg = cv2.medianBlur(fr, 255) for i in range(0, n): fr_bg = cv2.medianBlur(fr_bg, 255) fr_diff = cv2.convertScaleAbs(fr.astype(np.int16)-fr_bg.astype(np.int16)).astype(np.uint8) hsv = cv2.cvtColor(fr_diff, cv2.COLOR_BGR2HSV) # range: 180, 255, 255 fr_diff = cv2.cvtColor(fr_diff, cv2.COLOR_BGR2GRAY) if lm == "median": mh = np.median(hsv[:,:,0]) ms = np.median(hsv[:,:,1]) md = np.median(hsv[:,:,2]) elif lm == "average": mh = np.average(hsv[:,:,0]) ms = np.average(hsv[:,:,1]) md = np.average(hsv[:,:,2]) bg = cv2.inRange(hsv, np.array([0,0,0]), np.array([mh,ms,md])) fr_diff[bg>0] = 0 fr_diff[bg==0] = 255 cv2.rectangle(fr_diff,(0,0),(fr_diff.shape[1]-1,fr_diff.shape[0]-1),(255,255,255),1) mask = cv2.floodFill(fr_diff, None, (0, 0), 255, 0, 0, (4 | cv2.FLOODFILL_FIXED_RANGE))[2] #(4 | cv.FLOODFILL_FIXED_RANGE | cv.FLOODFILL_MASK_ONLY | 255 << 8) # 255 << 8 tells to fill with the value 255) mask = mask[1:mask.shape[0]-1, 1:mask.shape[1]-1] fr_diff[mask>0] = 0 #fr_diff = cv2.dilate(cv2.erode(fr_diff, element), element) if count % 2: # odd: is photo without the flash fr_mask = cv2.cvtColor(cv2.imread(fl_).astype(np.uint8), cv2.COLOR_BGR2GRAY) fr_not = np.bitwise_not(fr_mask) fr_shadow = np.bitwise_and(fr_diff, fr_not).astype(np.uint8) fr_fg = np.bitwise_or(fr_diff, fr_mask).astype(np.uint8) cv2.imwrite(fl_, fr_mask) m = cv2.inRange(fr, np.array([240,240,240]), np.array([255,255,255])) fr[m>0] = (239,239,239) m = cv2.inRange(fr, np.array([0,0,0]), np.array([15,15,15])) fr[m>0] = (16,16,16) fr[fr_shadow>0] = (fr[fr_shadow>0] / 17).astype(np.uint8) #fr[fr_fg==0] = (255,255,255) fr_fg[fr_fg>0] = 3 #probable fg mask, bgdModel, fgdModel = cv2.grabCut(fr, fr_fg, None,None,None,65, cv2.GC_INIT_WITH_MASK) mask = np.where((mask==2)|(mask==0),0,1).astype('uint8') #fr[mask==0] = (255,255,255) cv2.imwrite(fl, fr) #b = 3 #d = 15 element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b)) mask_e = cv2.erode(mask, element) * 255 element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d)) mask_d = cv2.dilate(mask, element) * 127 mask_d[mask_e>0] = 255 cv2.imwrite(f"{str(count)}_trimask.png", mask_d.astype(np.uint8)) cutout(fl, f"{str(count)}_trimask.png", f"{str(count)}_cutout.png") a_map = cv2.imread(f"{str(count)}_cutout.png", cv2.IMREAD_UNCHANGED).astype(np.uint8) B, G, R, A = cv2.split(a_map) alpha = A / 255 alpha[A<255] = alpha[A<255] / 17 R = (255 * (1 - alpha) + R * alpha).astype(np.uint8) G = (255 * (1 - alpha) + G * alpha).astype(np.uint8) B = (255 * (1 - alpha) + B * alpha).astype(np.uint8) fr = cv2.merge((B, G, R)) cv2.imwrite(fl, fr) return fl else: # even: with the flash fl_ = fl.split(".")[0] + "_.png" cv2.imwrite(fl_, fr_diff.astype(np.uint8)) return fl_ def denoise(fl): fr = cv2.imread(fl).astype(np.uint8) fr = cv2.fastNlMeansDenoisingColored(fr, None, 5,5,7,21) cv2.imwrite(fl, fr) return fl def sharpest(fl, i): break_vid = get_frames(fl, "vid_input_frame", "origin", i) frames = [] blur_s = [] for jdx, fr in enumerate(break_vid[0]): frames.append(cv2.imread(fr).astype(np.uint8)) blur_s.append(cv2.Laplacian(cv2.cvtColor(frames[len(frames)-1], cv2.COLOR_BGR2GRAY), cv2.CV_64F).var()) print(str(int(blur_s[jdx]))) indx = np.argmax(blur_s) fl = break_vid[0][indx] n = 25 half = int(n/2) if indx-half < 0: n = indx*2+1 elif indx+half >= len(frames): n = (len(frames)-1-indx)*2+1 #denoise frame = cv2.fastNlMeansDenoisingColoredMulti( srcImgs = frames, imgToDenoiseIndex = indx, temporalWindowSize = n, hColor = 5, templateWindowSize = 7, searchWindowSize = 21) cv2.imwrite(fl, frame) print(str(i) +'th file, sharpest frame: '+str(indx)+', name: '+fl) return fl def sortFiles(e): e = e.split('/') return e[len(e)-1] def loadf(f, r_bg, mh, ms, md, lm, b, d): if f != None and f[0] != None: f.sort(key=sortFiles) fnew = [] for i, fl in enumerate(f): ftype = fl.split('/') if ftype[len(ftype)-1].split('.')[1] == 'mp4': fl = sharpest(fl, i) else: fl = denoise(fl) if r_bg == True: fl = remove_bg(fl, i, mh, ms, md, lm, b, d) if i % 2: # odd: is photo without the flash fnew.append(fl) else: fnew.append(fl) return fnew, fnew else: return f, f title="""

Video interpolation from images with FILM

This space uses FILM to generate interpolation frames in a set of image files you need to turn into a video for stop motion animation. If .mp4 videos are uploaded instead, selects the sharpest frame of each. Limited to 300 uploaded frames, from the beginning of input.
Duplicate Space

""" with gr.Blocks() as demo: with gr.Column(): gr.HTML(title) with gr.Row(): with gr.Column(): with gr.Accordion(label="Upload files here", open=True): files_orig = gr.File(file_count="multiple", file_types=['image', '.mp4']) files_input = gr.File(file_count="multiple", visible=False) gallery_input = gr.Gallery(label="Slideshow", preview=True, columns=8192, interactive=False) with gr.Group(): with gr.Accordion(label="Max differences for background", open=False): mh = gr.Slider(minimum=0, maximum=180, step=1, value=180, label="Hue") ms = gr.Slider(minimum=0, maximum=255, step=1, value=255, label="Saturation") md = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Lightness") lm = gr.Radio(label="Use max diffs from", choices=["average", "median", "slider"], value="slider") with gr.Tab("Border"): b_size = gr.Slider(minimum=1, maximum=255, step=2, value=3, label="Inner") d_size = gr.Slider(minimum=1, maximum=255, step=2, value=15, label="Outer") files_orig.upload(fn=loadf, inputs=[files_orig, False, mh, ms, md, lm, b_size, d_size], outputs=[files_input, gallery_input]) with gr.Row(): interpolation_slider = gr.Slider(minimum=1, maximum=24, step=1, value=1, label="Interpolation Steps: ") with gr.Row(): fps_output_slider = gr.Slider(minimum=0, maximum=24, step=1, value=24, label="FPS output: ") submit_btn = gr.Button("Submit") with gr.Column(): video_output = gr.Video() file_output = gr.File() submit_btn.click(fn=infer, inputs=[files_input, interpolation_slider, fps_output_slider], outputs=[video_output, file_output]) demo.launch()