File size: 9,427 Bytes
be6d4fe
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4a56f6
bbe8153
e2f851a
 
bbe8153
 
 
cf309f8
 
 
4fceacd
b6488dd
bbe8153
 
 
 
212f3d8
3ec7744
 
 
 
8727e48
8084825
53c7f91
 
8084825
 
 
53c7f91
 
 
8084825
53c7f91
8f08c01
53c7f91
8084825
 
 
 
 
 
 
 
 
53c7f91
8084825
3ec7744
 
 
 
 
 
 
 
 
 
 
 
 
 
2d673d7
0603124
2d673d7
f14422e
3ec7744
 
 
 
 
 
41e00e5
3ec7744
 
 
 
 
 
 
 
 
 
 
 
 
cf1d320
3ec7744
f185bce
6640713
4a10134
 
fcb0ea5
7e3803b
 
 
 
bbe8153
cf309f8
f185bce
81b9dd6
 
f185bce
7e3803b
 
bbe8153
 
81b9dd6
 
 
 
bbe8153
 
81b9dd6
bbe8153
81b9dd6
bbe8153
 
 
 
 
 
 
 
 
 
 
cf309f8
bbe8153
 
7e3803b
bbe8153
7e3803b
 
bbe8153
81b9dd6
7e3803b
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
7e3803b
 
 
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
7e3803b
bbe8153
4fceacd
bbe8153
4fceacd
bbe8153
4fceacd
 
 
8ca72f6
136adc4
f14422e
56704ce
b6488dd
3ec7744
b6488dd
 
f14422e
b6488dd
 
f14422e
b6488dd
 
 
f14422e
 
 
b6488dd
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e642140
 
23b843d
b6488dd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import gradio as gr

"""
=====================================================
Optical Flow: Predicting movement with the RAFT model
=====================================================

Optical flow is the task of predicting movement between two images, usually two
consecutive frames of a video. Optical flow models take two images as input, and
predict a flow: the flow indicates the displacement of every single pixel in the
first image, and maps it to its corresponding pixel in the second image. Flows
are (2, H, W)-dimensional tensors, where the first axis corresponds to the
predicted horizontal and vertical displacements.

The following example illustrates how torchvision can be used to predict flows
using our implementation of the RAFT model. We will also see how to convert the
predicted flows to RGB images for visualization.
"""

import cv2
import numpy as np
import os
import sys
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
from torchvision.io import read_video
from torchvision.models.optical_flow import Raft_Large_Weights
from torchvision.models.optical_flow import raft_large
from torchvision.io import write_jpeg
import torchvision.transforms as T

import tempfile
from pathlib import Path
from urllib.request import urlretrieve
import tensorflow as tf
from scipy.interpolate import interp2d
from imageio import imread, imwrite
from flowio import readFlowFile

def write_flo(flow, filename):
    """
    Write optical flow in Middlebury .flo format
    
    :param flow: optical flow map
    :param filename: optical flow file path to be saved
    :return: None
    
    from https://github.com/liruoteng/OpticalFlowToolkit/
    
    """
    # forcing conversion to float32 precision
    flow = flow.cpu().data.numpy()
    flow = flow.astype(np.float32)
    f = open(filename, 'wb')
    magic = np.array([202021.25], dtype=np.float32)
    (height, width) = flow.shape[0:2]
    w = np.array([width], dtype=np.int32)
    h = np.array([height], dtype=np.int32)
    magic.tofile(f)
    w.tofile(f)
    h.tofile(f)
    flow.tofile(f)
    f.close()

def warpImage(im, vx, vy, cast_uint8=True):
    '''
    function to warp images with different dimensions
    '''

    height2, width2, nChannels = im.shape
    height1, width1 = vx.shape

    x = np.linspace(1, width2, width2)
    y = np.linspace(1, height2, height2)
    X = np.linspace(1, width1, width1)
    Y = np.linspace(1, height1, height1)
    xx, yy = np.meshgrid(x, y)
    XX, YY = np.meshgrid(X, Y)
    #XX = XX + vx
    XX = np.concatenate([XX, vx], axis = 1)
    #YY = YY + vy
    YY = np.concatenate([YY, vy], axis = 1)
    mask = (XX < 1) | (XX > width2) | (YY < 1) | (YY > height2)
    XX = np.clip(XX, 1, width2)
    YY = np.clip(XX, 1, height2)

    warpI2 = np.zeros((height1, width1, nChannels))
    for i in range(nChannels):
        f = interp2d(x, y, im[:, :, i], 'cubic')
        foo = f(X, Y)
        foo[mask] = 0.6
        warpI2[:, :, i] = foo

    mask = 1 - mask

    if cast_uint8:
        warpI2 = warpI2.astype(np.uint8)

    return warpI2, mask


def get_warp_res(fname_image, fname_flow, fname_output='warped.png'):
    print(f"FNAME IMAGE: {fname_image}")
    im2 = imread(fname_image)
    print(f"FNAME IMAGE READED: {im2}")
    flow = fname_flow.cpu().detach().numpy()
    im_warped, _ = warpImage(im2, flow[:, :, 0], flow[:, :, 1])
    imwrite(fname_output, im_warped)
    
def infer():
    video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
    video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
    _ = urlretrieve(video_url, video_path)

    frames, _, _ = read_video(str(video_path), output_format="TCHW")
    print(f"FRAME BEFORE: {frames[100]}")
    img1_batch = torch.stack([frames[100]])
    img2_batch = torch.stack([frames[101]])
    print(f"FRAME AFTER: {img1_batch}")
    weights = Raft_Large_Weights.DEFAULT
    transforms = weights.transforms()


    def preprocess(img1_batch, img2_batch):
        img1_batch = F.resize(img1_batch, size=[520, 960])
        img2_batch = F.resize(img2_batch, size=[520, 960])
        return transforms(img1_batch, img2_batch)


    img1_batch, img2_batch = preprocess(img1_batch, img2_batch)

    print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")


####################################
# Estimating Optical flow using RAFT
# ----------------------------------
# We will use our RAFT implementation from
# :func:`~torchvision.models.optical_flow.raft_large`, which follows the same
# architecture as the one described in the `original paper <https://arxiv.org/abs/2003.12039>`_.
# We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
# builder, which is smaller and faster to run, sacrificing a bit of accuracy.

    

# If you can, run this example on a GPU, it will be a lot faster.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
    model = model.eval()

    list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
    print(f"type = {type(list_of_flows)}")
    print(f"length = {len(list_of_flows)} = number of iterations of the model")

####################################
# The RAFT model outputs lists of predicted flows where each entry is a
# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
# in the model. For more details on the iterative nature of the model, please
# refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
# are only interested in the final predicted flows (they are the most acccurate
# ones), so we will just retrieve the last item in the list.
#
# As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
# W) for batches of flows) where each entry corresponds to the horizontal and
# vertical displacement of each pixel from the first image to the second image.
# Note that the predicted flows are in "pixel" unit, they are not normalized
# w.r.t. the dimensions of the images.
    predicted_flows = list_of_flows[-1]
    print(f"dtype = {predicted_flows.dtype}")
    print(f"shape = {predicted_flows.shape} = (N, 2, H, W)")
    print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}")


####################################
# Visualizing predicted flows
# ---------------------------
# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
# convert a flow into an RGB image. It also supports batches of flows.
# each "direction" in the flow will be mapped to a given RGB color. In the
# images below, pixels with similar colors are assumed by the model to be moving
# in similar directions. The model is properly able to predict the movement of
# the ball and the player. Note in particular the different predicted direction
# of the ball in the first image (going to the left) and in the second image
# (going up).

    from torchvision.utils import flow_to_image

    #flow_imgs = flow_to_image(predicted_flows)

    #print(flow_imgs)

    predicted_flow = list_of_flows[-1][0]
    flow_img = flow_to_image(predicted_flow).to("cpu")
#     output_folder = "/tmp/"  # Update this to the folder of your choice
    write_jpeg(flow_img, f"predicted_flow.jpg")
    input_image = flow_to_image(frames[100]).to("cpu")
    write_jpeg(input_image, f"frame_input.jpg")
    flo_file = write_flo(predicted_flow, "flofile.flo")
    #write_jpeg(frames[100], f"input_image.jpg")
    #res = warp_image(img1_batch, predicted_flow)
    
    # define a transform to convert a tensor to PIL image
    #transform = T.ToPILImage()
    
    # convert the tensor to PIL image using above transform
    #img = transform(frames[100])
    
    # display the PIL image
    #img.show()
    #img.save('frame_input.jpg')
    #res = get_warp_res("frame_input.jpg", predicted_flow, fname_output='warped.png')
    #print(res)
    return "done", "predicted_flow.jpg", ["flofile.flo"], 'frame_input.jpg'
####################################
# Bonus: Creating GIFs of predicted flows
# ---------------------------------------
# In the example above we have only shown the predicted flows of 2 pairs of
# frames. A fun way to apply the Optical Flow models is to run the model on an
# entire video, and create a new video from all the predicted flows. Below is a
# snippet that can get you started with this. We comment out the code, because
# this example is being rendered on a machine without a GPU, and it would take
# too long to run it.

# from torchvision.io import write_jpeg
# for i, (img1, img2) in enumerate(zip(frames, frames[1:])):
#     # Note: it would be faster to predict batches of flows instead of individual flows
#     img1, img2 = preprocess(img1, img2)

#     list_of_flows = model(img1.to(device), img2.to(device))
#     predicted_flow = list_of_flows[-1][0]
#     flow_img = flow_to_image(predicted_flow).to("cpu")
#     output_folder = "/tmp/"  # Update this to the folder of your choice
#     write_jpeg(flow_img, output_folder + f"predicted_flow_{i}.jpg")

####################################
# Once the .jpg flow images are saved, you can convert them into a video or a
# GIF using ffmpeg with e.g.:
#
# ffmpeg -f image2 -framerate 30 -i predicted_flow_%d.jpg -loop -1 flow.gif

      
gr.Interface(fn=infer, inputs=[], outputs=[gr.Textbox(), gr.Image(), gr.Files(), gr.Image()]).launch()