Spaces:
Runtime error
Runtime error
File size: 3,967 Bytes
8832b9b 88a2ed3 8832b9b 88a2ed3 8832b9b 88a2ed3 8832b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
from pathlib import Path
from PIL import Image
import torch
import yaml
import math
import torchvision.transforms as T
from torchvision.io import read_video,write_video
import os
import random
import numpy as np
from torchvision.io import write_video
# from kornia.filters import joint_bilateral_blur
from kornia.geometry.transform import remap
from kornia.utils.grid import create_meshgrid
import cv2
def save_video_frames(video_path, img_size=(512,512)):
video, _, _ = read_video(video_path, output_format="TCHW")
# rotate video -90 degree if video is .mov format. this is a weird bug in torchvision
if video_path.endswith('.mov'):
video = T.functional.rotate(video, -90)
video_name = Path(video_path).stem
os.makedirs(f'data/{video_name}', exist_ok=True)
for i in range(len(video)):
ind = str(i).zfill(5)
image = T.ToPILImage()(video[i])
image_resized = image.resize((img_size), resample=Image.Resampling.LANCZOS)
image_resized.save(f'data/{video_name}/{ind}.png')
def video_to_frames(video_path, img_size=(512,512)):
video, _, _ = read_video(video_path, output_format="TCHW")
# rotate video -90 degree if video is .mov format. this is a weird bug in torchvision
if video_path.endswith('.mov'):
video = T.functional.rotate(video, -90)
video_name = Path(video_path).stem
# os.makedirs(f'data/{video_name}', exist_ok=True)
frames = []
for i in range(len(video)):
ind = str(i).zfill(5)
image = T.ToPILImage()(video[i])
image_resized = image.resize((img_size), resample=Image.Resampling.LANCZOS)
# image_resized.save(f'data/{video_name}/{ind}.png')
frames.append(image_resized)
return frames
def add_dict_to_yaml_file(file_path, key, value):
data = {}
# If the file already exists, load its contents into the data dictionary
if os.path.exists(file_path):
with open(file_path, 'r') as file:
data = yaml.safe_load(file)
# Add or update the key-value pair
data[key] = value
# Save the data back to the YAML file
with open(file_path, 'w') as file:
yaml.dump(data, file)
def isinstance_str(x: object, cls_name: str):
"""
Checks whether x has any class *named* cls_name in its ancestry.
Doesn't require access to the class's implementation.
Useful for patching!
"""
for _cls in x.__class__.__mro__:
if _cls.__name__ == cls_name:
return True
return False
def batch_cosine_sim(x, y):
if type(x) is list:
x = torch.cat(x, dim=0)
if type(y) is list:
y = torch.cat(y, dim=0)
x = x / x.norm(dim=-1, keepdim=True)
y = y / y.norm(dim=-1, keepdim=True)
similarity = x @ y.T
return similarity
def load_imgs(data_path, n_frames, device='cuda', pil=False):
imgs = []
pils = []
for i in range(n_frames):
img_path = os.path.join(data_path, "%05d.jpg" % i)
if not os.path.exists(img_path):
img_path = os.path.join(data_path, "%05d.png" % i)
img_pil = Image.open(img_path)
pils.append(img_pil)
img = T.ToTensor()(img_pil).unsqueeze(0)
imgs.append(img)
if pil:
return torch.cat(imgs).to(device), pils
return torch.cat(imgs).to(device)
def save_video(raw_frames, save_path, fps=10):
video_codec = "libx264"
video_options = {
"crf": "18", # Constant Rate Factor (lower value = higher quality, 18 is a good balance)
"preset": "slow", # Encoding preset (e.g., ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow)
}
frames = (raw_frames * 255).to(torch.uint8).cpu().permute(0, 2, 3, 1)
write_video(save_path, frames, fps=fps, video_codec=video_codec, options=video_options)
def seed_everything(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
|