fffiloni commited on
Commit
369ca2b
1 Parent(s): 8c0dd1b

simplification to flow generation only

Browse files
Files changed (1) hide show
  1. app.py +2 -151
app.py CHANGED
@@ -17,9 +17,6 @@ using our implementation of the RAFT model. We will also see how to convert the
17
  predicted flows to RGB images for visualization.
18
  """
19
 
20
- from diffusers import DiffusionPipeline, ControlNetModel
21
- from diffusers import UniPCMultistepScheduler
22
-
23
  import cv2
24
  import numpy as np
25
  import os
@@ -42,53 +39,6 @@ from scipy.interpolate import LinearNDInterpolator
42
  from imageio import imread, imwrite
43
 
44
 
45
- # Constants
46
- low_threshold = 100
47
- high_threshold = 200
48
-
49
- # Models
50
- controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
51
- pipe = DiffusionPipeline.from_pretrained(
52
- "mikesmodels/Waltz_with_Bashir_Diffusion", controlnet=controlnet, custom_pipeline="stable_diffusion_controlnet_img2img", safety_checker=None, torch_dtype=torch.float16
53
- )
54
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
55
-
56
- # This command loads the individual model components on GPU on-demand. So, we don't
57
- # need to explicitly call pipe.to("cuda").
58
- pipe.enable_model_cpu_offload()
59
-
60
- pipe.enable_xformers_memory_efficient_attention()
61
-
62
- # Generator seed,
63
- generator = torch.Generator('cuda').manual_seed(int(123456))
64
-
65
- def get_canny_filter(image):
66
- if not isinstance(image, np.ndarray):
67
- image = np.array(image)
68
-
69
- image = cv2.Canny(image, low_threshold, high_threshold)
70
- image = image[:, :, None]
71
- image = np.concatenate([image, image, image], axis=2)
72
- canny_image = Image.fromarray(image)
73
- return canny_image
74
-
75
-
76
- def generate_images(prompt, canny_image, image):
77
-
78
- output = pipe(
79
- controlnet_conditioning_image=canny_image,
80
- image = image,
81
- prompt = prompt,
82
- generator=generator,
83
- num_images_per_prompt=1,
84
- num_inference_steps=20,
85
- )
86
- all_outputs = []
87
- all_outputs.append(canny_image)
88
- for image in output.images:
89
- all_outputs.append(image)
90
- return all_outputs
91
-
92
 
93
  def write_flo(flow, filename):
94
  """
@@ -125,20 +75,7 @@ def infer():
125
  #frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
126
  #print(f"FRAME BEFORE stack: {frames[100]}")
127
 
128
- prompt = "wltzwthbshr basketball player"
129
-
130
- pil2diff_img = Image.open("./basket1.jpg")
131
- canny_image = get_canny_filter(pil2diff_img)
132
- diffused_img = generate_images(prompt, canny_image, pil2diff_img)
133
- print(f"DIFFUSED IMG: {diffused_img[1]}")
134
-
135
- diffused_img[1].save("diffused_input1.jpg")
136
 
137
- pil2diff_img2 = Image.open("./frame2.jpg")
138
- canny_image2 = get_canny_filter(pil2diff_img2)
139
-
140
- canny_image.save("canny1.jpg")
141
- canny_image2.save("canny2.jpg")
142
  input_frame_1 = read_image(str("./basket1.jpg"), ImageReadMode.UNCHANGED)
143
  print(f"FRAME 1: {input_frame_1}")
144
  input_frame_2 = read_image(str("./basket2.jpg"), ImageReadMode.UNCHANGED)
@@ -233,94 +170,8 @@ def infer():
233
  write_jpeg(flow_img, f"predicted_flow.jpg")
234
 
235
  flo_file = write_flo(predicted_flow, "flofile.flo")
236
-
237
- # define a transform to convert a tensor to PIL image
238
- transform = T.ToPILImage()
239
 
240
- # convert the tensor to PIL image using above transform
241
- #img = transform(frames[1])
242
- img = transform(input_frame_2)
243
- img = img.resize((960, 520))
244
- # display the PIL image
245
- #img.show()
246
- frame2pil = np.array(img.convert('RGB'))
247
- print(f"frame1pil: {frame2pil}")
248
- print(f"frame1pil shape: {frame2pil.shape}")
249
- print(f"frame1pil dtype: {frame2pil.dtype}")
250
- img.save('raw_frame2.jpg')
251
-
252
- # convert the tensor diffused to PIL image using above transform
253
- #img = transform(frames[1])
254
- img_diff = transform(input_diffused)
255
- img_diff = img_diff.resize((960, 520))
256
- # display the PIL image
257
- #img.show()
258
- diffpil = np.array(img_diff.convert('RGB'))
259
- print(f"frame1pil: {diffpil}")
260
- print(f"frame1pil shape: {diffpil.shape}")
261
- print(f"frame1pil dtype: {diffpil.dtype}")
262
- img_diff.save('diffused_resized.jpg')
263
-
264
-
265
- numpy_array_flow = predicted_flow.permute(1, 2, 0).detach().cpu().numpy()
266
- print(f"numpy_array_flow: {numpy_array_flow}")
267
- print(f"numpy_array_flow shape: {numpy_array_flow.shape}")
268
- print(f"numpy_array_flow dtype: {numpy_array_flow.dtype}")
269
-
270
- h, w = numpy_array_flow.shape[:2]
271
- numpy_array_flow = numpy_array_flow.copy()
272
- numpy_array_flow[:, :, 0] += np.arange(w)
273
- numpy_array_flow[:, :, 1] += np.arange(h)[:, np.newaxis]
274
- # print('flow stats', flow.max(), flow.min(), flow.mean())
275
- # print(flow)
276
- numpy_array_flow*=1.
277
- # print('flow stats mul', flow.max(), flow.min(), flow.mean())
278
- # res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
279
- res = cv2.remap(diffpil, numpy_array_flow, None, cv2.INTER_LANCZOS4)
280
- print(res)
281
-
282
- res = Image.fromarray(res)
283
- res.save('warped.jpg')
284
-
285
- blend2 = Image.open('raw_frame2.jpg')
286
- blend2 = Image.blend(res,blend2,0.5)
287
- blend2.save("blended2.jpg")
288
-
289
- pil2diff_blend = Image.open("warped.jpg")
290
- #pil2diff_blend = Image.open("./basket2.jpg")
291
- canny_image = get_canny_filter(pil2diff_blend)
292
- diffused_blend = generate_images(prompt, canny_image, pil2diff_blend)
293
- print(f"DIFFUSED IMG: {diffused_blend[1]}")
294
-
295
- diffused_blend[1].save("diffused_blended_2.jpg")
296
-
297
- return "done", "predicted_flow.jpg", ["flofile.flo"], "diffused_input1.jpg", "diffused_blended_2.jpg", 'warped.jpg', "blended2.jpg"
298
- ####################################
299
- # Bonus: Creating GIFs of predicted flows
300
- # ---------------------------------------
301
- # In the example above we have only shown the predicted flows of 2 pairs of
302
- # frames. A fun way to apply the Optical Flow models is to run the model on an
303
- # entire video, and create a new video from all the predicted flows. Below is a
304
- # snippet that can get you started with this. We comment out the code, because
305
- # this example is being rendered on a machine without a GPU, and it would take
306
- # too long to run it.
307
-
308
- # from torchvision.io import write_jpeg
309
- # for i, (img1, img2) in enumerate(zip(frames, frames[1:])):
310
- # # Note: it would be faster to predict batches of flows instead of individual flows
311
- # img1, img2 = preprocess(img1, img2)
312
-
313
- # list_of_flows = model(img1.to(device), img2.to(device))
314
- # predicted_flow = list_of_flows[-1][0]
315
- # flow_img = flow_to_image(predicted_flow).to("cpu")
316
- # output_folder = "/tmp/" # Update this to the folder of your choice
317
- # write_jpeg(flow_img, output_folder + f"predicted_flow_{i}.jpg")
318
-
319
- ####################################
320
- # Once the .jpg flow images are saved, you can convert them into a video or a
321
- # GIF using ffmpeg with e.g.:
322
- #
323
- # ffmpeg -f image2 -framerate 30 -i predicted_flow_%d.jpg -loop -1 flow.gif
324
 
325
 
326
- gr.Interface(fn=infer, inputs=[], outputs=[gr.Textbox(), gr.Image(label="flow"), gr.Files(), gr.Image(label="diffused input"), gr.Image(), gr.Image(label="warped flow to img2"), gr.Image(label="blended result to diffuse")]).launch()
 
17
  predicted flows to RGB images for visualization.
18
  """
19
 
 
 
 
20
  import cv2
21
  import numpy as np
22
  import os
 
39
  from imageio import imread, imwrite
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def write_flo(flow, filename):
44
  """
 
75
  #frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
76
  #print(f"FRAME BEFORE stack: {frames[100]}")
77
 
 
 
 
 
 
 
 
 
78
 
 
 
 
 
 
79
  input_frame_1 = read_image(str("./basket1.jpg"), ImageReadMode.UNCHANGED)
80
  print(f"FRAME 1: {input_frame_1}")
81
  input_frame_2 = read_image(str("./basket2.jpg"), ImageReadMode.UNCHANGED)
 
170
  write_jpeg(flow_img, f"predicted_flow.jpg")
171
 
172
  flo_file = write_flo(predicted_flow, "flofile.flo")
 
 
 
173
 
174
+ return "done", "predicted_flow.jpg", ["flofile.flo"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
 
177
+ gr.Interface(fn=infer, inputs=[], outputs=[gr.Textbox(), gr.Image(label="flow"), gr.Files()]).launch()