Spaces:

fffiloni
/

RAFT

Paused

File size: 8,265 Bytes

be6d4fe
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4a56f6
bbe8153
e2f851a
 
bbe8153
4c8c2c3
bbe8153
 
2974c8a
cf309f8
 
4fceacd
b6488dd
bbe8153
 
 
 
a07cb96
fd0a544
3ec7744
a07cb96
65f0b97
3ec7744
65f0b97
 
6a7976f
8727e48
8084825
53c7f91
 
8084825
 
 
53c7f91
 
 
8084825
53c7f91
8f08c01
53c7f91
8084825
 
 
 
 
 
 
 
 
53c7f91
8084825
3ec7744
fcb0ea5
2eb2041
dd47853
 
 
ffd7140
6b37ee1
 
54a7442
dd7adab
2eb2041
5807124
2eb2041
5807124
f252d28
5807124
 
 
0e27115
 
ffd7140
 
 
7e3803b
 
bbe8153
 
81b9dd6
 
 
 
bbe8153
 
81b9dd6
bbe8153
81b9dd6
bbe8153
 
 
 
 
 
 
 
 
 
 
cf309f8
bbe8153
 
65f0b97
bbe8153
c798d8a
6d8f6a3
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
7e3803b
6d8f6a3
 
 
bbe8153
 
 
 
 
 
 
 
 
 
 
 
 
 
7e3803b
bbe8153
4fceacd
bbe8153
4fceacd
bbe8153
4fceacd
089d664
 
ffd7140
4fceacd
8ca72f6
29224cf
56704ce
b6488dd
591578d
e642140
64daad5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d72081f
64daad5
 
 
 
 
d275034
 
 
 
 
33f7b35
d275034
 
 
 
591578d
d275034
 
952baeb
 
 
 
d275034
3d18183
d275034

import gradio as gr

"""
=====================================================
Optical Flow: Predicting movement with the RAFT model
=====================================================

Optical flow is the task of predicting movement between two images, usually two
consecutive frames of a video. Optical flow models take two images as input, and
predict a flow: the flow indicates the displacement of every single pixel in the
first image, and maps it to its corresponding pixel in the second image. Flows
are (2, H, W)-dimensional tensors, where the first axis corresponds to the
predicted horizontal and vertical displacements.

The following example illustrates how torchvision can be used to predict flows
using our implementation of the RAFT model. We will also see how to convert the
predicted flows to RGB images for visualization.
"""

import cv2
import numpy as np
import os
import sys
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
from torchvision.io import read_video, read_image, ImageReadMode
from torchvision.models.optical_flow import Raft_Large_Weights
from torchvision.models.optical_flow import raft_large
from torchvision.io import write_jpeg
import torchvision.transforms as T

import tempfile
from pathlib import Path
from urllib.request import urlretrieve

from scipy.interpolate import LinearNDInterpolator
from imageio import imread, imwrite

device = "cuda" if torch.cuda.is_available() else "cpu"

model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
model = model.eval()

def write_flo(flow, filename):
    """
    Write optical flow in Middlebury .flo format
    
    :param flow: optical flow map
    :param filename: optical flow file path to be saved
    :return: None
    
    from https://github.com/liruoteng/OpticalFlowToolkit/
    
    """
    # forcing conversion to float32 precision
    flow = flow.cpu().data.numpy()
    flow = flow.astype(np.float32)
    f = open(filename, 'wb')
    magic = np.array([202021.25], dtype=np.float32)
    (height, width) = flow.shape[0:2]
    w = np.array([width], dtype=np.int32)
    h = np.array([height], dtype=np.int32)
    magic.tofile(f)
    w.tofile(f)
    h.tofile(f)
    flow.tofile(f)
    f.close()


    
def infer(frameA, frameB):
    #video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
    #video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
    #_ = urlretrieve(video_url, video_path)
   
    #frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
    #print(f"FRAME BEFORE stack: {frames[100]}")
    
    
    input_frame_1 = read_image(str(frameA), ImageReadMode.UNCHANGED)
    print(f"FRAME 1: {input_frame_1}")
    input_frame_2 = read_image(str(frameB), ImageReadMode.UNCHANGED)
    print(f"FRAME 1: {input_frame_2}")
    
    #img1_batch = torch.stack([frames[0]])
    #img2_batch = torch.stack([frames[1]])

    img1_batch = torch.stack([input_frame_1])
    img2_batch = torch.stack([input_frame_2])
    
    print(f"FRAME AFTER stack: {img1_batch}")
    
    weights = Raft_Large_Weights.DEFAULT
    transforms = weights.transforms()


    def preprocess(img1_batch, img2_batch):
        img1_batch = F.resize(img1_batch, size=[520, 960])
        img2_batch = F.resize(img2_batch, size=[520, 960])
        return transforms(img1_batch, img2_batch)


    img1_batch, img2_batch = preprocess(img1_batch, img2_batch)

    print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")


####################################
# Estimating Optical flow using RAFT
# ----------------------------------
# We will use our RAFT implementation from
# :func:`~torchvision.models.optical_flow.raft_large`, which follows the same
# architecture as the one described in the `original paper <https://arxiv.org/abs/2003.12039>`_.
# We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
# builder, which is smaller and faster to run, sacrificing a bit of accuracy.

    

# If you can, run this example on a GPU, it will be a lot faster.
    

    list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
    print(f"list_of_flows type = {type(list_of_flows)}")
    print(f"list_of_flows length = {len(list_of_flows)} = number of iterations of the model")

####################################
# The RAFT model outputs lists of predicted flows where each entry is a
# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
# in the model. For more details on the iterative nature of the model, please
# refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
# are only interested in the final predicted flows (they are the most acccurate
# ones), so we will just retrieve the last item in the list.
#
# As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
# W) for batches of flows) where each entry corresponds to the horizontal and
# vertical displacement of each pixel from the first image to the second image.
# Note that the predicted flows are in "pixel" unit, they are not normalized
# w.r.t. the dimensions of the images.
    predicted_flows = list_of_flows[-1]
    print(f"predicted_flows dtype = {predicted_flows.dtype}")
    print(f"predicted_flows shape = {predicted_flows.shape} = (N, 2, H, W)")
    print(f"predicted_flows min = {predicted_flows.min()}, predicted_flows max = {predicted_flows.max()}")


####################################
# Visualizing predicted flows
# ---------------------------
# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
# convert a flow into an RGB image. It also supports batches of flows.
# each "direction" in the flow will be mapped to a given RGB color. In the
# images below, pixels with similar colors are assumed by the model to be moving
# in similar directions. The model is properly able to predict the movement of
# the ball and the player. Note in particular the different predicted direction
# of the ball in the first image (going to the left) and in the second image
# (going up).

    from torchvision.utils import flow_to_image

    #flow_imgs = flow_to_image(predicted_flows)

    #print(flow_imgs)

    predicted_flow = list_of_flows[-1][0]
    print(f"predicted flow dtype = {predicted_flow.dtype}")
    print(f"predicted flow shape = {predicted_flow.shape}")
    
    flow_img = flow_to_image(predicted_flow).to("cpu")
    write_jpeg(flow_img, f"predicted_flow.jpg")
    
    flo_file = write_flo(predicted_flow, "flofile.flo")
    
    return "predicted_flow.jpg", "flofile.flo"

title="""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            RAFT Optical Flow
        </h1>
        </div>
       
    </div>
"""
description="<p style='text-align:center'>PyTorch way to Generate optical flow image & .flo file from 2 consecutive frames with RAFT model</p>" 
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
"""
with gr.Blocks(css=css) as block:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        gr.HTML(description)

        frame1_inp = gr.Image(source="upload", type="filepath", label="frame 1")
        frame2_inp = gr.Image(source="upload", type="filepath", label="frame 2")
        
        submit_btn = gr.Button("Submit")
        
        flow_img_out = gr.Image(label="flow image") 
        flow_file_out = gr.File(label="flow file")
        
        
        examples=[
            ['basket1.jpg','basket2.jpg'],
            ['frame1.jpg', 'frame2.jpg']
        ]
        ex = gr.Examples(examples=examples, fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out], cache_examples=True, run_on_click=True)
        #ex.dataset.headers = [""]
        
        

    submit_btn.click(fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out])
    

block.launch()