File size: 8,265 Bytes
be6d4fe bbe8153 d4a56f6 bbe8153 e2f851a bbe8153 4c8c2c3 bbe8153 2974c8a cf309f8 4fceacd b6488dd bbe8153 a07cb96 fd0a544 3ec7744 a07cb96 65f0b97 3ec7744 65f0b97 6a7976f 8727e48 8084825 53c7f91 8084825 53c7f91 8084825 53c7f91 8f08c01 53c7f91 8084825 53c7f91 8084825 3ec7744 fcb0ea5 2eb2041 dd47853 ffd7140 6b37ee1 54a7442 dd7adab 2eb2041 5807124 2eb2041 5807124 f252d28 5807124 0e27115 ffd7140 7e3803b bbe8153 81b9dd6 bbe8153 81b9dd6 bbe8153 81b9dd6 bbe8153 cf309f8 bbe8153 65f0b97 bbe8153 c798d8a 6d8f6a3 bbe8153 7e3803b 6d8f6a3 bbe8153 7e3803b bbe8153 4fceacd bbe8153 4fceacd bbe8153 4fceacd 089d664 ffd7140 4fceacd 8ca72f6 29224cf 56704ce b6488dd 591578d e642140 64daad5 d72081f 64daad5 d275034 33f7b35 d275034 591578d d275034 952baeb d275034 3d18183 d275034 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import gradio as gr
"""
=====================================================
Optical Flow: Predicting movement with the RAFT model
=====================================================
Optical flow is the task of predicting movement between two images, usually two
consecutive frames of a video. Optical flow models take two images as input, and
predict a flow: the flow indicates the displacement of every single pixel in the
first image, and maps it to its corresponding pixel in the second image. Flows
are (2, H, W)-dimensional tensors, where the first axis corresponds to the
predicted horizontal and vertical displacements.
The following example illustrates how torchvision can be used to predict flows
using our implementation of the RAFT model. We will also see how to convert the
predicted flows to RGB images for visualization.
"""
import cv2
import numpy as np
import os
import sys
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
from torchvision.io import read_video, read_image, ImageReadMode
from torchvision.models.optical_flow import Raft_Large_Weights
from torchvision.models.optical_flow import raft_large
from torchvision.io import write_jpeg
import torchvision.transforms as T
import tempfile
from pathlib import Path
from urllib.request import urlretrieve
from scipy.interpolate import LinearNDInterpolator
from imageio import imread, imwrite
device = "cuda" if torch.cuda.is_available() else "cpu"
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
model = model.eval()
def write_flo(flow, filename):
"""
Write optical flow in Middlebury .flo format
:param flow: optical flow map
:param filename: optical flow file path to be saved
:return: None
from https://github.com/liruoteng/OpticalFlowToolkit/
"""
# forcing conversion to float32 precision
flow = flow.cpu().data.numpy()
flow = flow.astype(np.float32)
f = open(filename, 'wb')
magic = np.array([202021.25], dtype=np.float32)
(height, width) = flow.shape[0:2]
w = np.array([width], dtype=np.int32)
h = np.array([height], dtype=np.int32)
magic.tofile(f)
w.tofile(f)
h.tofile(f)
flow.tofile(f)
f.close()
def infer(frameA, frameB):
#video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
#video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
#_ = urlretrieve(video_url, video_path)
#frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
#print(f"FRAME BEFORE stack: {frames[100]}")
input_frame_1 = read_image(str(frameA), ImageReadMode.UNCHANGED)
print(f"FRAME 1: {input_frame_1}")
input_frame_2 = read_image(str(frameB), ImageReadMode.UNCHANGED)
print(f"FRAME 1: {input_frame_2}")
#img1_batch = torch.stack([frames[0]])
#img2_batch = torch.stack([frames[1]])
img1_batch = torch.stack([input_frame_1])
img2_batch = torch.stack([input_frame_2])
print(f"FRAME AFTER stack: {img1_batch}")
weights = Raft_Large_Weights.DEFAULT
transforms = weights.transforms()
def preprocess(img1_batch, img2_batch):
img1_batch = F.resize(img1_batch, size=[520, 960])
img2_batch = F.resize(img2_batch, size=[520, 960])
return transforms(img1_batch, img2_batch)
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")
####################################
# Estimating Optical flow using RAFT
# ----------------------------------
# We will use our RAFT implementation from
# :func:`~torchvision.models.optical_flow.raft_large`, which follows the same
# architecture as the one described in the `original paper <https://arxiv.org/abs/2003.12039>`_.
# We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
# builder, which is smaller and faster to run, sacrificing a bit of accuracy.
# If you can, run this example on a GPU, it will be a lot faster.
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
print(f"list_of_flows type = {type(list_of_flows)}")
print(f"list_of_flows length = {len(list_of_flows)} = number of iterations of the model")
####################################
# The RAFT model outputs lists of predicted flows where each entry is a
# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
# in the model. For more details on the iterative nature of the model, please
# refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
# are only interested in the final predicted flows (they are the most acccurate
# ones), so we will just retrieve the last item in the list.
#
# As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
# W) for batches of flows) where each entry corresponds to the horizontal and
# vertical displacement of each pixel from the first image to the second image.
# Note that the predicted flows are in "pixel" unit, they are not normalized
# w.r.t. the dimensions of the images.
predicted_flows = list_of_flows[-1]
print(f"predicted_flows dtype = {predicted_flows.dtype}")
print(f"predicted_flows shape = {predicted_flows.shape} = (N, 2, H, W)")
print(f"predicted_flows min = {predicted_flows.min()}, predicted_flows max = {predicted_flows.max()}")
####################################
# Visualizing predicted flows
# ---------------------------
# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
# convert a flow into an RGB image. It also supports batches of flows.
# each "direction" in the flow will be mapped to a given RGB color. In the
# images below, pixels with similar colors are assumed by the model to be moving
# in similar directions. The model is properly able to predict the movement of
# the ball and the player. Note in particular the different predicted direction
# of the ball in the first image (going to the left) and in the second image
# (going up).
from torchvision.utils import flow_to_image
#flow_imgs = flow_to_image(predicted_flows)
#print(flow_imgs)
predicted_flow = list_of_flows[-1][0]
print(f"predicted flow dtype = {predicted_flow.dtype}")
print(f"predicted flow shape = {predicted_flow.shape}")
flow_img = flow_to_image(predicted_flow).to("cpu")
write_jpeg(flow_img, f"predicted_flow.jpg")
flo_file = write_flo(predicted_flow, "flofile.flo")
return "predicted_flow.jpg", "flofile.flo"
title="""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
margin-bottom: 10px;
"
>
<h1 style="font-weight: 600; margin-bottom: 7px;">
RAFT Optical Flow
</h1>
</div>
</div>
"""
description="<p style='text-align:center'>PyTorch way to Generate optical flow image & .flo file from 2 consecutive frames with RAFT model</p>"
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
"""
with gr.Blocks(css=css) as block:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
gr.HTML(description)
frame1_inp = gr.Image(source="upload", type="filepath", label="frame 1")
frame2_inp = gr.Image(source="upload", type="filepath", label="frame 2")
submit_btn = gr.Button("Submit")
flow_img_out = gr.Image(label="flow image")
flow_file_out = gr.File(label="flow file")
examples=[
['basket1.jpg','basket2.jpg'],
['frame1.jpg', 'frame2.jpg']
]
ex = gr.Examples(examples=examples, fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out], cache_examples=True, run_on_click=True)
#ex.dataset.headers = [""]
submit_btn.click(fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out])
block.launch() |