Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import torchvision | |
import pickle | |
from typing import Any | |
import lmdb | |
import cv2 | |
import imageio | |
import numpy as np | |
from PIL import Image | |
import Imath | |
import OpenEXR | |
from pdb import set_trace as st | |
from pathlib import Path | |
from functools import partial | |
import io | |
import gzip | |
import random | |
import torch | |
from torch import nn | |
import torch.nn.functional as F | |
from torch.utils.data import DataLoader, Dataset | |
from torchvision import transforms | |
from torch.utils.data.distributed import DistributedSampler | |
from pathlib import Path | |
from guided_diffusion import logger | |
def load_dataset( | |
file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
# shuffle=True, | |
num_workers=6, | |
load_depth=False, | |
preprocess=None, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
trainer_name='input_rec', | |
use_lmdb=False, | |
infi_sampler=True | |
): | |
# st() | |
# dataset_cls = { | |
# 'input_rec': MultiViewDataset, | |
# 'nv': NovelViewDataset, | |
# }[trainer_name] | |
# st() | |
if use_lmdb: | |
logger.log('using LMDB dataset') | |
# dataset_cls = LMDBDataset_MV # 2.5-3iter/s, but unstable, drops to 1 later. | |
if 'nv' in trainer_name: | |
dataset_cls = LMDBDataset_NV_Compressed # 2.5-3iter/s, but unstable, drops to 1 later. | |
else: | |
dataset_cls = LMDBDataset_MV_Compressed # 2.5-3iter/s, but unstable, drops to 1 later. | |
# dataset = dataset_cls(file_path) | |
else: | |
if 'nv' in trainer_name: | |
dataset_cls = NovelViewDataset # 1.5-2iter/s | |
else: | |
dataset_cls = MultiViewDataset | |
dataset = dataset_cls(file_path, | |
reso, | |
reso_encoder, | |
test=False, | |
preprocess=preprocess, | |
load_depth=load_depth, | |
imgnet_normalize=imgnet_normalize, | |
dataset_size=dataset_size) | |
logger.log('dataset_cls: {}, dataset size: {}'.format( | |
trainer_name, len(dataset))) | |
loader = DataLoader(dataset, | |
batch_size=batch_size, | |
num_workers=num_workers, | |
drop_last=False, | |
pin_memory=True, | |
persistent_workers=num_workers > 0, | |
shuffle=False) | |
return loader | |
def load_data( | |
file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
# shuffle=True, | |
num_workers=6, | |
load_depth=False, | |
preprocess=None, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
trainer_name='input_rec', | |
use_lmdb=False, | |
infi_sampler=True | |
): | |
# st() | |
# dataset_cls = { | |
# 'input_rec': MultiViewDataset, | |
# 'nv': NovelViewDataset, | |
# }[trainer_name] | |
# st() | |
if use_lmdb: | |
logger.log('using LMDB dataset') | |
# dataset_cls = LMDBDataset_MV # 2.5-3iter/s, but unstable, drops to 1 later. | |
if 'nv' in trainer_name: | |
dataset_cls = LMDBDataset_NV_Compressed # 2.5-3iter/s, but unstable, drops to 1 later. | |
else: | |
dataset_cls = LMDBDataset_MV_Compressed # 2.5-3iter/s, but unstable, drops to 1 later. | |
# dataset = dataset_cls(file_path) | |
else: | |
if 'nv' in trainer_name: | |
dataset_cls = NovelViewDataset # 1.5-2iter/s | |
else: | |
dataset_cls = MultiViewDataset | |
dataset = dataset_cls(file_path, | |
reso, | |
reso_encoder, | |
test=False, | |
preprocess=preprocess, | |
load_depth=load_depth, | |
imgnet_normalize=imgnet_normalize, | |
dataset_size=dataset_size) | |
logger.log('dataset_cls: {}, dataset size: {}'.format( | |
trainer_name, len(dataset))) | |
# st() | |
if infi_sampler: | |
train_sampler = DistributedSampler(dataset=dataset, | |
shuffle=True, | |
drop_last=True) | |
loader = DataLoader(dataset, | |
batch_size=batch_size, | |
num_workers=num_workers, | |
drop_last=True, | |
pin_memory=True, | |
persistent_workers=num_workers > 0, | |
sampler=train_sampler) | |
while True: | |
yield from loader | |
else: | |
# loader = DataLoader(dataset, | |
# batch_size=batch_size, | |
# num_workers=num_workers, | |
# drop_last=False, | |
# pin_memory=True, | |
# persistent_workers=num_workers > 0, | |
# shuffle=False) | |
st() | |
return dataset | |
def load_eval_rays(file_path="", | |
reso=64, | |
reso_encoder=224, | |
imgnet_normalize=True): | |
dataset = MultiViewDataset(file_path, | |
reso, | |
reso_encoder, | |
imgnet_normalize=imgnet_normalize) | |
pose_list = dataset.single_pose_list | |
ray_list = [] | |
for pose_fname in pose_list: | |
# c2w = dataset.get_c2w(pose_fname).reshape(1,4,4) #[1, 4, 4] | |
# rays_o, rays_d = dataset.gen_rays(c2w) | |
# ray_list.append( | |
# [rays_o.unsqueeze(0), | |
# rays_d.unsqueeze(0), | |
# c2w.reshape(-1, 16)]) | |
c2w = dataset.get_c2w(pose_fname).reshape(16) #[1, 4, 4] | |
c = torch.cat([c2w, dataset.intrinsics], | |
dim=0).reshape(25) # 25, no '1' dim needed. | |
ray_list.append(c) | |
return ray_list | |
def load_eval_data(file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
num_workers=1, | |
load_depth=False, | |
preprocess=None, | |
imgnet_normalize=True, | |
interval=1, | |
**kwargs | |
): | |
dataset = MultiViewDataset(file_path, | |
reso, | |
reso_encoder, | |
preprocess=preprocess, | |
load_depth=load_depth, | |
test=True, | |
imgnet_normalize=imgnet_normalize, | |
interval=interval, **kwargs) | |
print('eval dataset size: {}'.format(len(dataset))) | |
# train_sampler = DistributedSampler(dataset=dataset) | |
loader = DataLoader( | |
dataset, | |
batch_size=batch_size, | |
num_workers=num_workers, | |
drop_last=False, | |
shuffle=False, | |
) | |
# sampler=train_sampler) | |
return loader | |
def load_memory_data(file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
num_workers=1, | |
load_depth=True, | |
preprocess=None, | |
imgnet_normalize=True): | |
# load a single-instance into the memory to speed up training IO | |
dataset = MultiViewDataset(file_path, | |
reso, | |
reso_encoder, | |
preprocess=preprocess, | |
load_depth=True, | |
test=False, | |
overfitting=True, | |
imgnet_normalize=imgnet_normalize, | |
overfitting_bs=batch_size) | |
logger.log('!!!!!!! memory dataset size: {} !!!!!!'.format(len(dataset))) | |
# train_sampler = DistributedSampler(dataset=dataset) | |
loader = DataLoader( | |
dataset, | |
batch_size=len(dataset), | |
num_workers=num_workers, | |
drop_last=False, | |
shuffle=False, | |
) | |
all_data: dict = next(iter(loader)) | |
while True: | |
start_idx = np.random.randint(0, len(dataset) - batch_size + 1) | |
yield { | |
k: v[start_idx:start_idx + batch_size] | |
for k, v in all_data.items() | |
} | |
class MultiViewDataset(Dataset): | |
def __init__(self, | |
file_path, | |
reso, | |
reso_encoder, | |
preprocess=None, | |
classes=False, | |
load_depth=False, | |
test=False, | |
scene_scale=1, | |
overfitting=False, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
overfitting_bs=-1, | |
interval=1): | |
self.file_path = file_path | |
self.overfitting = overfitting | |
self.scene_scale = scene_scale | |
self.reso = reso | |
self.reso_encoder = reso_encoder | |
self.classes = False | |
self.load_depth = load_depth | |
self.preprocess = preprocess | |
assert not self.classes, "Not support class condition now." | |
# self.ins_list = os.listdir(self.file_path) | |
# if test: # TODO | |
dataset_name = Path(self.file_path).stem.split('_')[0] | |
self.dataset_name = dataset_name | |
if test: | |
# ins_list_file = Path(self.file_path).parent / f'{dataset_name}_test_list.txt' # ? in domain | |
if dataset_name == 'chair': | |
self.ins_list = sorted(os.listdir( | |
self.file_path))[1:2] # more diversity | |
else: | |
self.ins_list = sorted(os.listdir(self.file_path))[ | |
0:1] # the first 1 instance for evaluation reference. | |
else: | |
# self.ins_list = sorted(Path(self.file_path).glob('[0-8]*')) | |
# self.ins_list = Path(self.file_path).glob('*') | |
# self.ins_list = list(Path(self.file_path).glob('*'))[:dataset_size] | |
# ins_list_file = Path( | |
# self.file_path).parent / f'{dataset_name}s_train_list.txt' | |
# assert ins_list_file.exists(), 'add training list for ShapeNet' | |
# with open(ins_list_file, 'r') as f: | |
# self.ins_list = [name.strip() for name in f.readlines()] | |
# if dataset_name == 'chair': | |
ins_list_file = Path( | |
self.file_path).parent / f'{dataset_name}_train_list.txt' | |
# st() | |
assert ins_list_file.exists(), 'add training list for ShapeNet' | |
with open(ins_list_file, 'r') as f: | |
self.ins_list = [name.strip() | |
for name in f.readlines()][:dataset_size] | |
# else: | |
# self.ins_list = Path(self.file_path).glob('*') | |
if overfitting: | |
self.ins_list = self.ins_list[:1] | |
self.rgb_list = [] | |
self.pose_list = [] | |
self.depth_list = [] | |
self.data_ins_list = [] | |
self.instance_data_length = -1 | |
for ins in self.ins_list: | |
cur_rgb_path = os.path.join(self.file_path, ins, 'rgb') | |
cur_pose_path = os.path.join(self.file_path, ins, 'pose') | |
cur_all_fname = sorted([ | |
t.split('.')[0] for t in os.listdir(cur_rgb_path) | |
if 'depth' not in t | |
][::interval]) | |
if self.instance_data_length == -1: | |
self.instance_data_length = len(cur_all_fname) | |
else: | |
assert len(cur_all_fname) == self.instance_data_length | |
# ! check filtered data | |
# for idx in range(len(cur_all_fname)): | |
# fname = cur_all_fname[idx] | |
# if not Path(os.path.join(cur_rgb_path, fname + '.png') ).exists(): | |
# cur_all_fname.remove(fname) | |
# del cur_all_fname[idx] | |
if test: | |
mid_index = len(cur_all_fname) // 3 * 2 | |
cur_all_fname.insert(0, cur_all_fname[mid_index]) | |
self.pose_list += ([ | |
os.path.join(cur_pose_path, fname + '.txt') | |
for fname in cur_all_fname | |
]) | |
self.rgb_list += ([ | |
os.path.join(cur_rgb_path, fname + '.png') | |
for fname in cur_all_fname | |
]) | |
self.depth_list += ([ | |
os.path.join(cur_rgb_path, fname + '_depth0001.exr') | |
for fname in cur_all_fname | |
]) | |
self.data_ins_list += ([ins] * len(cur_all_fname)) | |
# validate overfitting on images | |
if overfitting: | |
# bs=9 | |
# self.pose_list = self.pose_list[::50//9+1] | |
# self.rgb_list = self.rgb_list[::50//9+1] | |
# self.depth_list = self.depth_list[::50//9+1] | |
# bs=6 | |
# self.pose_list = self.pose_list[::50//6+1] | |
# self.rgb_list = self.rgb_list[::50//6+1] | |
# self.depth_list = self.depth_list[::50//6+1] | |
# bs=3 | |
assert overfitting_bs != -1 | |
# bs=1 | |
# self.pose_list = self.pose_list[25:26] | |
# self.rgb_list = self.rgb_list[25:26] | |
# self.depth_list = self.depth_list[25:26] | |
# uniform pose sampling | |
self.pose_list = self.pose_list[::50//overfitting_bs+1] | |
self.rgb_list = self.rgb_list[::50//overfitting_bs+1] | |
self.depth_list = self.depth_list[::50//overfitting_bs+1] | |
# sequentially sampling pose | |
# self.pose_list = self.pose_list[25:25+overfitting_bs] | |
# self.rgb_list = self.rgb_list[25:25+overfitting_bs] | |
# self.depth_list = self.depth_list[25:25+overfitting_bs] | |
# duplicate the same pose | |
# self.pose_list = [self.pose_list[25]] * overfitting_bs | |
# self.rgb_list = [self.rgb_list[25]] * overfitting_bs | |
# self.depth_list = [self.depth_list[25]] * overfitting_bs | |
# self.pose_list = [self.pose_list[28]] * overfitting_bs | |
# self.rgb_list = [self.rgb_list[28]] * overfitting_bs | |
# self.depth_list = [self.depth_list[28]] * overfitting_bs | |
self.single_pose_list = [ | |
os.path.join(cur_pose_path, fname + '.txt') | |
for fname in cur_all_fname | |
] | |
# st() | |
# if imgnet_normalize: | |
transformations = [ | |
transforms.ToTensor(), # [0,1] range | |
] | |
if imgnet_normalize: | |
transformations.append( | |
transforms.Normalize((0.485, 0.456, 0.406), | |
(0.229, 0.224, 0.225)) # type: ignore | |
) | |
else: | |
transformations.append( | |
transforms.Normalize((0.5, 0.5, 0.5), | |
(0.5, 0.5, 0.5))) # type: ignore | |
self.normalize = transforms.Compose(transformations) | |
# self.normalize_normalrange = transforms.Compose([ | |
# transforms.ToTensor(),# [0,1] range | |
# transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), | |
# ]) | |
fx = fy = 525 | |
cx = cy = 256 # rendering default K | |
factor = self.reso / (cx * 2) # 128 / 512 | |
self.fx = fx * factor | |
self.fy = fy * factor | |
self.cx = cx * factor | |
self.cy = cy * factor | |
# ! fix scale for triplane ray_sampler(), here we adopt [0,1] uv range, not [0, w] img space range. | |
self.cx /= self.reso # 0.5 | |
self.cy /= self.reso # 0.5 | |
self.fx /= self.reso | |
self.fy /= self.reso | |
intrinsics = np.array([[self.fx, 0, self.cx], [0, self.fy, self.cy], | |
[0, 0, 1]]).reshape(9) | |
# self.intrinsics = torch.from_numpy(intrinsics).float() | |
self.intrinsics = intrinsics | |
def __len__(self): | |
return len(self.rgb_list) | |
def get_c2w(self, pose_fname): | |
with open(pose_fname, 'r') as f: | |
cam2world = f.readline().strip() | |
cam2world = [float(t) for t in cam2world.split(' ')] | |
c2w = torch.tensor(cam2world, dtype=torch.float32).reshape(4, 4) | |
return c2w | |
def gen_rays(self, c2w): | |
# Generate rays | |
self.h = self.reso | |
self.w = self.reso | |
yy, xx = torch.meshgrid( | |
torch.arange(self.h, dtype=torch.float32) + 0.5, | |
torch.arange(self.w, dtype=torch.float32) + 0.5, | |
indexing='ij') | |
xx = (xx - self.cx) / self.fx | |
yy = (yy - self.cy) / self.fy | |
zz = torch.ones_like(xx) | |
dirs = torch.stack((xx, yy, zz), dim=-1) # OpenCV convention | |
dirs /= torch.norm(dirs, dim=-1, keepdim=True) | |
dirs = dirs.reshape(1, -1, 3, 1) | |
del xx, yy, zz | |
dirs = (c2w[:, None, :3, :3] @ dirs)[..., 0] | |
origins = c2w[:, None, :3, 3].expand(-1, self.h * self.w, | |
-1).contiguous() | |
origins = origins.view(-1, 3) | |
dirs = dirs.view(-1, 3) | |
return origins, dirs | |
def read_depth(self, idx): | |
depth_path = self.depth_list[idx] | |
# image_path = os.path.join(depth_fname, self.image_names[index]) | |
exr = OpenEXR.InputFile(depth_path) | |
header = exr.header() | |
size = (header['dataWindow'].max.x - header['dataWindow'].min.x + 1, | |
header['dataWindow'].max.y - header['dataWindow'].min.y + 1) | |
FLOAT = Imath.PixelType(Imath.PixelType.FLOAT) | |
depth_str = exr.channel('B', FLOAT) | |
depth = np.frombuffer(depth_str, | |
dtype=np.float32).reshape(size[1], | |
size[0]) # H W | |
depth = np.nan_to_num(depth, posinf=0, neginf=0) | |
depth = depth.reshape(size) | |
def resize_depth_mask(depth_to_resize, resolution): | |
depth_resized = cv2.resize(depth_to_resize, | |
(resolution, resolution), | |
interpolation=cv2.INTER_LANCZOS4) | |
# interpolation=cv2.INTER_AREA) | |
return depth_resized > 0 # type: ignore | |
fg_mask_reso = resize_depth_mask(depth, self.reso) | |
fg_mask_sr = resize_depth_mask(depth, 128) | |
# depth = cv2.resize(depth, (self.reso, self.reso), | |
# interpolation=cv2.INTER_LANCZOS4) | |
# interpolation=cv2.INTER_AREA) | |
# depth_mask = depth > 0 | |
# depth = np.expand_dims(depth, axis=0).reshape(size) | |
# return torch.from_numpy(depth) | |
return torch.from_numpy(depth), torch.from_numpy( | |
fg_mask_reso), torch.from_numpy(fg_mask_sr) | |
def load_bbox(self, mask): | |
nonzero_value = torch.nonzero(mask) | |
height, width = nonzero_value.max(dim=0)[0] | |
top, left = nonzero_value.min(dim=0)[0] | |
bbox = torch.tensor([top, left, height, width], dtype=torch.float32) | |
return bbox | |
def __getitem__(self, idx): | |
rgb_fname = self.rgb_list[idx] | |
pose_fname = self.pose_list[idx] | |
raw_img = imageio.imread(rgb_fname) | |
if self.preprocess is None: | |
img_to_encoder = cv2.resize(raw_img, | |
(self.reso_encoder, self.reso_encoder), | |
interpolation=cv2.INTER_LANCZOS4) | |
# interpolation=cv2.INTER_AREA) | |
img_to_encoder = img_to_encoder[ | |
..., :3] #[3, reso_encoder, reso_encoder] | |
img_to_encoder = self.normalize(img_to_encoder) | |
else: | |
img_to_encoder = self.preprocess(Image.open(rgb_fname)) # clip | |
img = cv2.resize(raw_img, (self.reso, self.reso), | |
interpolation=cv2.INTER_LANCZOS4) | |
# interpolation=cv2.INTER_AREA) | |
# img_sr = cv2.resize(raw_img, (512, 512), interpolation=cv2.INTER_AREA) | |
# img_sr = cv2.resize(raw_img, (256, 256), interpolation=cv2.INTER_AREA) # just as refinement, since eg3d uses 64->128 final resolution | |
# img_sr = cv2.resize(raw_img, (128, 128), interpolation=cv2.INTER_AREA) # just as refinement, since eg3d uses 64->128 final resolution | |
img_sr = cv2.resize( | |
raw_img, (128, 128), interpolation=cv2.INTER_LANCZOS4 | |
) # just as refinement, since eg3d uses 64->128 final resolution | |
# img = torch.from_numpy(img)[..., :3].permute( | |
# 2, 0, 1) / 255.0 #[3, reso, reso] | |
img = torch.from_numpy(img)[..., :3].permute( | |
2, 0, 1 | |
) / 127.5 - 1 #[3, reso, reso], normalize to [-1,1], follow triplane range | |
img_sr = torch.from_numpy(img_sr)[..., :3].permute( | |
2, 0, 1 | |
) / 127.5 - 1 #[3, reso, reso], normalize to [-1,1], follow triplane range | |
# c2w = self.get_c2w(pose_fname).reshape(1, 4, 4) #[1, 4, 4] | |
# rays_o, rays_d = self.gen_rays(c2w) | |
# return img_to_encoder, img, rays_o, rays_d, c2w.reshape(-1) | |
c2w = self.get_c2w(pose_fname).reshape(16) #[1, 4, 4] -> [1, 16] | |
# c = np.concatenate([c2w, self.intrinsics], axis=0).reshape(25) # 25, no '1' dim needed. | |
c = torch.cat([c2w, torch.from_numpy(self.intrinsics)], | |
dim=0).reshape(25) # 25, no '1' dim needed. | |
ret_dict = { | |
# 'rgb_fname': rgb_fname, | |
'img_to_encoder': img_to_encoder, | |
'img': img, | |
'c': c, | |
'img_sr': img_sr, | |
# 'ins_name': self.data_ins_list[idx] | |
} | |
if self.load_depth: | |
depth, depth_mask, depth_mask_sr = self.read_depth(idx) | |
bbox = self.load_bbox(depth_mask) | |
ret_dict.update({ | |
'depth': depth, | |
'depth_mask': depth_mask, | |
'depth_mask_sr': depth_mask_sr, | |
'bbox': bbox | |
}) | |
# rays_o, rays_d = self.gen_rays(c2w) | |
# return img_to_encoder, img, c | |
return ret_dict | |
class MultiViewDatasetforLMDB(MultiViewDataset): | |
def __init__(self, | |
file_path, | |
reso, | |
reso_encoder, | |
preprocess=None, | |
classes=False, | |
load_depth=False, | |
test=False, | |
scene_scale=1, | |
overfitting=False, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
overfitting_bs=-1): | |
super().__init__(file_path, reso, reso_encoder, preprocess, classes, | |
load_depth, test, scene_scale, overfitting, | |
imgnet_normalize, dataset_size, overfitting_bs) | |
def __len__(self): | |
return super().__len__() | |
# return 100 # for speed debug | |
def __getitem__(self, idx): | |
# ret_dict = super().__getitem__(idx) | |
rgb_fname = self.rgb_list[idx] | |
pose_fname = self.pose_list[idx] | |
raw_img = imageio.imread(rgb_fname)[..., :3] | |
if raw_img.shape[-1] == 4: # ! set bg to white | |
alpha_mask = raw_img[..., -1:] / 255 | |
raw_img = alpha_mask * raw_img[..., :3] + (1-alpha_mask) * np.ones_like(raw_img[..., :3]) * 255 | |
raw_img = raw_img.astype(np.uint8) | |
raw_img = cv2.resize(raw_img, | |
(self.reso, self.reso), | |
interpolation=cv2.INTER_LANCZOS4) | |
c2w = self.get_c2w(pose_fname).reshape(16) #[1, 4, 4] -> [1, 16] | |
# c = np.concatenate([c2w, self.intrinsics], axis=0).reshape(25) # 25, no '1' dim needed. | |
c = torch.cat([c2w, torch.from_numpy(self.intrinsics)], | |
dim=0).reshape(25) # 25, no '1' dim needed. | |
depth, depth_mask, depth_mask_sr = self.read_depth(idx) | |
bbox = self.load_bbox(depth_mask) | |
ret_dict = { | |
'raw_img': raw_img, | |
'c': c, | |
'depth': depth, | |
# 'depth_mask': depth_mask, # 64x64 here? | |
'bbox': bbox | |
} | |
return ret_dict | |
def load_data_dryrun( | |
file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
# shuffle=True, | |
num_workers=6, | |
load_depth=False, | |
preprocess=None, | |
imgnet_normalize=True): | |
# st() | |
dataset = MultiViewDataset(file_path, | |
reso, | |
reso_encoder, | |
test=False, | |
preprocess=preprocess, | |
load_depth=load_depth, | |
imgnet_normalize=imgnet_normalize) | |
print('dataset size: {}'.format(len(dataset))) | |
# st() | |
# train_sampler = DistributedSampler(dataset=dataset) | |
loader = DataLoader( | |
dataset, | |
batch_size=batch_size, | |
num_workers=num_workers, | |
# shuffle=shuffle, | |
drop_last=False, | |
) | |
# sampler=train_sampler) | |
return loader | |
class NovelViewDataset(MultiViewDataset): | |
"""novel view prediction version. | |
""" | |
def __init__(self, | |
file_path, | |
reso, | |
reso_encoder, | |
preprocess=None, | |
classes=False, | |
load_depth=False, | |
test=False, | |
scene_scale=1, | |
overfitting=False, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
overfitting_bs=-1): | |
super().__init__(file_path, reso, reso_encoder, preprocess, classes, | |
load_depth, test, scene_scale, overfitting, | |
imgnet_normalize, dataset_size, overfitting_bs) | |
def __getitem__(self, idx): | |
input_view = super().__getitem__( | |
idx) # get previous input view results | |
# get novel view of the same instance | |
novel_view = super().__getitem__( | |
(idx // self.instance_data_length) * self.instance_data_length + | |
random.randint(0, self.instance_data_length - 1)) | |
# assert input_view['ins_name'] == novel_view['ins_name'], 'should sample novel view from the same instance' | |
input_view.update({f'nv_{k}': v for k, v in novel_view.items()}) | |
return input_view | |
def load_data_for_lmdb( | |
file_path="", | |
reso=64, | |
reso_encoder=224, | |
batch_size=1, | |
# shuffle=True, | |
num_workers=6, | |
load_depth=False, | |
preprocess=None, | |
imgnet_normalize=True, | |
dataset_size=-1, | |
trainer_name='input_rec'): | |
# st() | |
# dataset_cls = { | |
# 'input_rec': MultiViewDataset, | |
# 'nv': NovelViewDataset, | |
# }[trainer_name] | |
# if 'nv' in trainer_name: | |
# dataset_cls = NovelViewDataset | |
# else: | |
# dataset_cls = MultiViewDataset | |
dataset_cls = MultiViewDatasetforLMDB | |
dataset = dataset_cls(file_path, | |
reso, | |
reso_encoder, | |
test=False, | |
preprocess=preprocess, | |
load_depth=load_depth, | |
imgnet_normalize=imgnet_normalize, | |
dataset_size=dataset_size) | |
logger.log('dataset_cls: {}, dataset size: {}'.format( | |
trainer_name, len(dataset))) | |
# train_sampler = DistributedSampler(dataset=dataset, shuffle=True, drop_last=True) | |
loader = DataLoader( | |
dataset, | |
shuffle=False, | |
batch_size=batch_size, | |
num_workers=num_workers, | |
drop_last=False, | |
prefetch_factor=2, | |
# prefetch_factor=3, | |
pin_memory=True, | |
persistent_workers=True, | |
) | |
# sampler=train_sampler) | |
# while True: | |
# yield from loader | |
return loader, dataset.dataset_name, len(dataset) | |
class LMDBDataset(Dataset): | |
def __init__(self, lmdb_path): | |
self.env = lmdb.open( | |
lmdb_path, | |
readonly=True, | |
max_readers=32, | |
lock=False, | |
readahead=False, | |
meminit=False, | |
) | |
self.num_samples = self.env.stat()['entries'] | |
# self.start_idx = self.env.stat()['start_idx'] | |
# self.end_idx = self.env.stat()['end_idx'] | |
def __len__(self): | |
return self.num_samples | |
def __getitem__(self, idx): | |
with self.env.begin(write=False) as txn: | |
key = str(idx).encode('utf-8') | |
value = txn.get(key) | |
sample = pickle.loads(value) | |
return sample | |
def resize_depth_mask(depth_to_resize, resolution): | |
depth_resized = cv2.resize(depth_to_resize, (resolution, resolution), | |
interpolation=cv2.INTER_LANCZOS4) | |
# interpolation=cv2.INTER_AREA) | |
return depth_resized, depth_resized > 0 # type: ignore | |
class LMDBDataset_MV(LMDBDataset): | |
def __init__(self, | |
lmdb_path, | |
reso, | |
reso_encoder, | |
imgnet_normalize=True, | |
**kwargs): | |
super().__init__(lmdb_path) | |
self.reso_encoder = reso_encoder | |
self.reso = reso | |
transformations = [ | |
transforms.ToTensor(), # [0,1] range | |
] | |
if imgnet_normalize: | |
transformations.append( | |
transforms.Normalize((0.485, 0.456, 0.406), | |
(0.229, 0.224, 0.225)) # type: ignore | |
) | |
else: | |
transformations.append( | |
transforms.Normalize((0.5, 0.5, 0.5), | |
(0.5, 0.5, 0.5))) # type: ignore | |
self.normalize = transforms.Compose(transformations) | |
def _post_process_sample(self, raw_img, depth): | |
if raw_img.shape[-1] == 4: # ! set bg to white | |
alpha_mask = raw_img[..., -1:] / 255 | |
raw_img = alpha_mask * raw_img[..., :3] + (1-alpha_mask) * np.ones_like(raw_img[..., :3]) * 255 | |
raw_img = raw_img.astype(np.uint8) | |
# if raw_img.shape[-1] == 4: # ! set bg to white | |
# raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGBA2RGB) | |
# img_to_encoder = cv2.resize(sample.pop('raw_img'), | |
if raw_img.shape[0] != self.reso_encoder: | |
img_to_encoder = cv2.resize(raw_img, | |
(self.reso_encoder, self.reso_encoder), | |
interpolation=cv2.INTER_LANCZOS4) | |
else: | |
img_to_encoder = raw_img | |
# interpolation=cv2.INTER_AREA) | |
# img_to_encoder = img_to_encoder[..., : | |
# 3] #[3, reso_encoder, reso_encoder] | |
img_to_encoder = self.normalize(img_to_encoder) | |
img = cv2.resize(raw_img, (self.reso, self.reso), | |
interpolation=cv2.INTER_LANCZOS4) | |
# if img.shape[-1] == 4: | |
# alpha_mask = img[..., -1:] > 0 | |
# img = alpha_mask * img[..., :3] + (1-alpha_mask) * np.ones_like(img[..., :3]) * 255 | |
img = torch.from_numpy(img)[..., :3].permute( | |
2, 0, 1 | |
) / 127.5 - 1 #[3, reso, reso], normalize to [-1,1], follow triplane range | |
# img_sr = torch.from_numpy(raw_img)[..., :3].permute( | |
# 2, 0, 1 | |
# ) / 127.5 - 1 #[3, reso, reso], normalize to [-1,1], follow triplane range | |
# depth | |
# fg_mask_reso = resize_depth_mask(sample['depth'], self.reso) | |
depth_reso, fg_mask_reso = resize_depth_mask(depth, self.reso) | |
return { | |
# **sample, | |
'img_to_encoder': img_to_encoder, | |
'img': img, | |
'depth_mask': fg_mask_reso, | |
# 'img_sr': img_sr, | |
'depth': depth_reso, | |
# ! no need to load img_sr for now | |
} | |
def __getitem__(self, idx): | |
sample = super().__getitem__(idx) | |
# do transformations online | |
return self._post_process_sample(sample['raw_img'], sample['depth']) | |
# return sample | |
def load_bytes(inp_bytes, dtype, shape): | |
return np.frombuffer(inp_bytes, dtype=dtype).reshape(shape).copy() | |
# Function to decompress an image using gzip and open with imageio | |
def decompress_and_open_image_gzip(compressed_data, is_img=False, decompress=True, decompress_fn=gzip.decompress): | |
# Decompress the image data using gzip | |
if decompress: | |
compressed_data = decompress_fn(compressed_data) | |
# Read the decompressed image using imageio | |
if is_img: | |
compressed_data = imageio.v3.imread(io.BytesIO(compressed_data)).copy() | |
# return image | |
return compressed_data | |
# Function to decompress an array using gzip | |
def decompress_array(compressed_data, shape, dtype, decompress=True, decompress_fn=gzip.decompress): | |
# Decompress the array data using gzip | |
if decompress: | |
# compressed_data = gzip.decompress(compressed_data) | |
compressed_data = decompress_fn(compressed_data) | |
# Convert the decompressed data to a NumPy array | |
# arr = np.frombuffer(decompressed_data, dtype=dtype).reshape(shape) | |
return load_bytes(compressed_data, dtype, shape) | |
class LMDBDataset_MV_Compressed(LMDBDataset_MV): | |
def __init__(self, | |
lmdb_path, | |
reso, | |
reso_encoder, | |
imgnet_normalize=True, | |
**kwargs): | |
super().__init__(lmdb_path, reso, reso_encoder, imgnet_normalize, | |
**kwargs) | |
with self.env.begin(write=False) as txn: | |
self.length = int( | |
txn.get('length'.encode('utf-8')).decode('utf-8')) - 40 | |
self.load_image_fn = partial(decompress_and_open_image_gzip, | |
is_img=True) | |
def __len__(self): | |
return self.length | |
def _load_lmdb_data(self, idx): | |
with self.env.begin(write=False) as txn: | |
raw_img_key = f'{idx}-raw_img'.encode('utf-8') | |
raw_img = self.load_image_fn(txn.get(raw_img_key)) | |
depth_key = f'{idx}-depth'.encode('utf-8') | |
depth = decompress_array(txn.get(depth_key), (512,512), np.float32) | |
c_key = f'{idx}-c'.encode('utf-8') | |
c = decompress_array(txn.get(c_key), (25, ), np.float32) | |
bbox_key = f'{idx}-bbox'.encode('utf-8') | |
bbox = decompress_array(txn.get(bbox_key), (4, ), np.float32) | |
return raw_img, depth, c, bbox | |
def _load_lmdb_data_no_decompress(self, idx): | |
with self.env.begin(write=False) as txn: | |
raw_img_key = f'{idx}-raw_img'.encode('utf-8') | |
# raw_img = txn.get(raw_img_key) | |
raw_img = self.load_image_fn(txn.get(raw_img_key), decompress=False) | |
depth_key = f'{idx}-depth'.encode('utf-8') | |
depth = decompress_array(txn.get(depth_key), (512,512), np.float32, decompress=False) | |
# depth = txn.get(depth_key), (512,512) | |
# c_key = f'{idx}-c'.encode('utf-8') | |
# c = txn.get(c_key), (25, ), np.float32 | |
# bbox_key = f'{idx}-bbox'.encode('utf-8') | |
# bbox = txn.get(bbox_key) | |
c_key = f'{idx}-c'.encode('utf-8') | |
c = decompress_array(txn.get(c_key), (25, ), np.float32, decompress=False) | |
bbox_key = f'{idx}-bbox'.encode('utf-8') | |
bbox = decompress_array(txn.get(bbox_key), (4, ), np.float32, decompress=False) | |
return raw_img, depth, c, bbox | |
def __getitem__(self, idx): | |
# sample = super(LMDBDataset).__getitem__(idx) | |
# do gzip uncompress online | |
''' | |
raw_img, depth, c, bbox = self._load_lmdb_data(idx) | |
return { | |
**self._post_process_sample(raw_img, depth), 'c': c, | |
'bbox': bbox*(self.reso/64.0), | |
# 'depth': depth, | |
} | |
''' | |
raw_img, depth, c, bbox = self._load_lmdb_data_no_decompress(idx) | |
return None | |
class LMDBDataset_NV_Compressed(LMDBDataset_MV_Compressed): | |
def __init__(self, lmdb_path, reso, reso_encoder, imgnet_normalize=True, **kwargs): | |
super().__init__(lmdb_path, reso, reso_encoder, imgnet_normalize, **kwargs) | |
self.instance_data_length = 50 # | |
def __getitem__(self, idx): | |
input_view = super().__getitem__( | |
idx) # get previous input view results | |
# get novel view of the same instance | |
try: | |
novel_view = super().__getitem__( | |
(idx // self.instance_data_length) * self.instance_data_length + | |
random.randint(0, self.instance_data_length - 1)) | |
except Exception as e: | |
raise NotImplementedError(idx) | |
assert input_view['ins_name'] == novel_view['ins_name'], 'should sample novel view from the same instance' | |
input_view.update({f'nv_{k}': v for k, v in novel_view.items()}) | |
return input_view |