Video-Matting-Anything

Paused

App Files Files Community

fffiloni commited on Jun 10, 2023

Commit

bad91c5

1 Parent(s): 31757cd

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -2

app.py CHANGED Viewed

@@ -66,6 +66,52 @@ grounding_dino_model = Model(model_config_path=GROUNDING_DINO_CONFIG_PATH, model
 generator = StableDiffusionPipeline.from_pretrained("checkpoints/stable-diffusion-v1-5", torch_dtype=torch.float16)
 generator.to(device)
 def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
     #global groundingdino_model, sam_predictor, generator
@@ -228,6 +274,40 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
     green_img = np.uint8(green_img)
     return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("MAM demo", add_help=True)
     parser.add_argument("--debug", action="store_true", help="using debug mode")
@@ -268,8 +348,9 @@ if __name__ == "__main__":
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(source='upload', type="numpy", value="assets/demo.jpg", tool="sketch")
-                task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
                 text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
                 background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")

 generator = StableDiffusionPipeline.from_pretrained("checkpoints/stable-diffusion-v1-5", torch_dtype=torch.float16)
 generator.to(device)
+def get_frames(video_in):
+    frames = []
+    #resize the video
+    clip = VideoFileClip(video_in)
+    #check fps
+    if clip.fps > 30:
+        print("vide rate is over 30, resetting to 30")
+        clip_resized = clip.resize(height=512)
+        clip_resized.write_videofile("video_resized.mp4", fps=30)
+    else:
+        print("video rate is OK")
+        clip_resized = clip.resize(height=512)
+        clip_resized.write_videofile("video_resized.mp4", fps=clip.fps)
+    print("video resized to 512 height")
+    # Opens the Video file with CV2
+    cap= cv2.VideoCapture("video_resized.mp4")
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    print("video fps: " + str(fps))
+    i=0
+    while(cap.isOpened()):
+        ret, frame = cap.read()
+        if ret == False:
+            break
+        cv2.imwrite('kang'+str(i)+'.jpg',frame)
+        frames.append('kang'+str(i)+'.jpg')
+        i+=1
+    cap.release()
+    cv2.destroyAllWindows()
+    print("broke the video into frames")
+    return frames, fps
+def create_video(frames, fps):
+    print("building video result")
+    clip = ImageSequenceClip(frames, fps=fps)
+    clip.write_videofile("movie.mp4", fps=fps)
+    return 'movie.mp4'
 def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
     #global groundingdino_model, sam_predictor, generator
     green_img = np.uint8(green_img)
     return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
+def infer(prompt,video_in, trim_value):
+    print(prompt)
+    break_vid = get_frames(video_in)
+    frames_list= break_vid[0]
+    fps = break_vid[1]
+    n_frame = int(trim_value*fps)
+    if n_frame >= len(frames_list):
+        print("video is shorter than the cut value")
+        n_frame = len(frames_list)
+    result_frames = []
+    print("set stop frames to: " + str(n_frame))
+    for i in frames_list[0:int(n_frame)]:
+        numpy_i = Image.open(i).convert("RGB")
+        #need to convert to numpy
+        matte_img = run_grounded_sam(numpy, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
+        #print(pix2pix_img)
+        #image = Image.open(pix2pix_img)
+        #rgb_im = image.convert("RGB")
+        # exporting the image
+        matte_img.save(f"result_img-{i}.jpg")
+        result_frames.append(f"result_img-{i}.jpg")
+        print("frame " + i + "/" + str(n_frame) + ": done;")
+    final_vid = create_video(result_frames, fps)
+    print("finished !")
+    return final_vid
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("MAM demo", add_help=True)
     parser.add_argument("--debug", action="store_true", help="using debug mode")
         with gr.Row():
             with gr.Column():
+                video_in = gr.Video(source='upload', type="filepath")
+                #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
+                task_type = "text"
                 text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
                 background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")