fffiloni commited on
Commit
bad91c5
·
1 Parent(s): 31757cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -2
app.py CHANGED
@@ -66,6 +66,52 @@ grounding_dino_model = Model(model_config_path=GROUNDING_DINO_CONFIG_PATH, model
66
  generator = StableDiffusionPipeline.from_pretrained("checkpoints/stable-diffusion-v1-5", torch_dtype=torch.float16)
67
  generator.to(device)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
70
 
71
  #global groundingdino_model, sam_predictor, generator
@@ -228,6 +274,40 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
228
  green_img = np.uint8(green_img)
229
  return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  if __name__ == "__main__":
232
  parser = argparse.ArgumentParser("MAM demo", add_help=True)
233
  parser.add_argument("--debug", action="store_true", help="using debug mode")
@@ -268,8 +348,9 @@ if __name__ == "__main__":
268
 
269
  with gr.Row():
270
  with gr.Column():
271
- input_image = gr.Image(source='upload', type="numpy", value="assets/demo.jpg", tool="sketch")
272
- task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
 
273
  text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
274
  background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
275
  background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
 
66
  generator = StableDiffusionPipeline.from_pretrained("checkpoints/stable-diffusion-v1-5", torch_dtype=torch.float16)
67
  generator.to(device)
68
 
69
+ def get_frames(video_in):
70
+ frames = []
71
+ #resize the video
72
+ clip = VideoFileClip(video_in)
73
+
74
+ #check fps
75
+ if clip.fps > 30:
76
+ print("vide rate is over 30, resetting to 30")
77
+ clip_resized = clip.resize(height=512)
78
+ clip_resized.write_videofile("video_resized.mp4", fps=30)
79
+ else:
80
+ print("video rate is OK")
81
+ clip_resized = clip.resize(height=512)
82
+ clip_resized.write_videofile("video_resized.mp4", fps=clip.fps)
83
+
84
+ print("video resized to 512 height")
85
+
86
+ # Opens the Video file with CV2
87
+ cap= cv2.VideoCapture("video_resized.mp4")
88
+
89
+ fps = cap.get(cv2.CAP_PROP_FPS)
90
+ print("video fps: " + str(fps))
91
+ i=0
92
+ while(cap.isOpened()):
93
+ ret, frame = cap.read()
94
+ if ret == False:
95
+ break
96
+ cv2.imwrite('kang'+str(i)+'.jpg',frame)
97
+ frames.append('kang'+str(i)+'.jpg')
98
+ i+=1
99
+
100
+ cap.release()
101
+ cv2.destroyAllWindows()
102
+ print("broke the video into frames")
103
+
104
+ return frames, fps
105
+
106
+
107
+ def create_video(frames, fps):
108
+ print("building video result")
109
+ clip = ImageSequenceClip(frames, fps=fps)
110
+ clip.write_videofile("movie.mp4", fps=fps)
111
+
112
+ return 'movie.mp4'
113
+
114
+
115
  def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
116
 
117
  #global groundingdino_model, sam_predictor, generator
 
274
  green_img = np.uint8(green_img)
275
  return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
276
 
277
+ def infer(prompt,video_in, trim_value):
278
+ print(prompt)
279
+ break_vid = get_frames(video_in)
280
+
281
+ frames_list= break_vid[0]
282
+ fps = break_vid[1]
283
+ n_frame = int(trim_value*fps)
284
+
285
+ if n_frame >= len(frames_list):
286
+ print("video is shorter than the cut value")
287
+ n_frame = len(frames_list)
288
+
289
+ result_frames = []
290
+ print("set stop frames to: " + str(n_frame))
291
+
292
+ for i in frames_list[0:int(n_frame)]:
293
+ numpy_i = Image.open(i).convert("RGB")
294
+ #need to convert to numpy
295
+
296
+ matte_img = run_grounded_sam(numpy, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
297
+ #print(pix2pix_img)
298
+ #image = Image.open(pix2pix_img)
299
+ #rgb_im = image.convert("RGB")
300
+
301
+ # exporting the image
302
+ matte_img.save(f"result_img-{i}.jpg")
303
+ result_frames.append(f"result_img-{i}.jpg")
304
+ print("frame " + i + "/" + str(n_frame) + ": done;")
305
+
306
+ final_vid = create_video(result_frames, fps)
307
+ print("finished !")
308
+
309
+ return final_vid
310
+
311
  if __name__ == "__main__":
312
  parser = argparse.ArgumentParser("MAM demo", add_help=True)
313
  parser.add_argument("--debug", action="store_true", help="using debug mode")
 
348
 
349
  with gr.Row():
350
  with gr.Column():
351
+ video_in = gr.Video(source='upload', type="filepath")
352
+ #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
353
+ task_type = "text"
354
  text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
355
  background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
356
  background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")