|
import numpy as np |
|
import torch |
|
from tqdm import tqdm |
|
import math |
|
from einops import rearrange |
|
import sys |
|
sys.path.append(".") |
|
from opensora.eval.flolpips.pwcnet import Network as PWCNet |
|
from opensora.eval.flolpips.flolpips import FloLPIPS |
|
|
|
loss_fn = FloLPIPS(net='alex', version='0.1').eval().requires_grad_(False) |
|
flownet = PWCNet().eval().requires_grad_(False) |
|
|
|
def trans(x): |
|
return x |
|
|
|
|
|
def calculate_flolpips(videos1, videos2, device): |
|
global loss_fn, flownet |
|
|
|
print("calculate_flowlpips...") |
|
loss_fn = loss_fn.to(device) |
|
flownet = flownet.to(device) |
|
|
|
if videos1.shape != videos2.shape: |
|
print("Warning: the shape of videos are not equal.") |
|
min_frames = min(videos1.shape[1], videos2.shape[1]) |
|
videos1 = videos1[:, :min_frames] |
|
videos2 = videos2[:, :min_frames] |
|
|
|
videos1 = trans(videos1) |
|
videos2 = trans(videos2) |
|
|
|
flolpips_results = [] |
|
for video_num in tqdm(range(videos1.shape[0])): |
|
video1 = videos1[video_num].to(device) |
|
video2 = videos2[video_num].to(device) |
|
frames_rec = video1[:-1] |
|
frames_rec_next = video1[1:] |
|
frames_gt = video2[:-1] |
|
frames_gt_next = video2[1:] |
|
t, c, h, w = frames_gt.shape |
|
flow_gt = flownet(frames_gt, frames_gt_next) |
|
flow_dis = flownet(frames_rec, frames_rec_next) |
|
flow_diff = flow_gt - flow_dis |
|
flolpips = loss_fn.forward(frames_gt, frames_rec, flow_diff, normalize=True) |
|
flolpips_results.append(flolpips.cpu().numpy().tolist()) |
|
|
|
flolpips_results = np.array(flolpips_results) |
|
flolpips = {} |
|
flolpips_std = {} |
|
|
|
for clip_timestamp in range(flolpips_results.shape[1]): |
|
flolpips[clip_timestamp] = np.mean(flolpips_results[:,clip_timestamp], axis=-1) |
|
flolpips_std[clip_timestamp] = np.std(flolpips_results[:,clip_timestamp], axis=-1) |
|
|
|
result = { |
|
"value": flolpips, |
|
"value_std": flolpips_std, |
|
"video_setting": video1.shape, |
|
"video_setting_name": "time, channel, heigth, width", |
|
"result": flolpips_results, |
|
"details": flolpips_results.tolist() |
|
} |
|
|
|
return result |
|
|
|
|
|
|
|
def main(): |
|
NUMBER_OF_VIDEOS = 8 |
|
VIDEO_LENGTH = 50 |
|
CHANNEL = 3 |
|
SIZE = 64 |
|
videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) |
|
videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) |
|
|
|
import json |
|
result = calculate_flolpips(videos1, videos2, "cuda:0") |
|
print(json.dumps(result, indent=4)) |
|
|
|
if __name__ == "__main__": |
|
main() |