SORA-3D

Paused

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

c371ec2

verified ·

1 Parent(s): 4191268

Upload 5 files

Browse files

Files changed (5) hide show

sf3d/models/sf3d_models_camera.py +32 -0
sf3d/models/sf3d_models_isosurface.py +229 -0
sf3d/models/sf3d_models_mesh.py +172 -0
sf3d/models/sf3d_models_network.py +195 -0
sf3d/models/sf3d_models_utils.py +292 -0

sf3d/models/sf3d_models_camera.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from dataclasses import dataclass, field
+from typing import List
+import torch
+import torch.nn as nn
+from sf3d.models.utils import BaseModule
+class LinearCameraEmbedder(BaseModule):
+    @dataclass
+    class Config(BaseModule.Config):
+        in_channels: int = 25
+        out_channels: int = 768
+        conditions: List[str] = field(default_factory=list)
+    cfg: Config
+    def configure(self) -> None:
+        self.linear = nn.Linear(self.cfg.in_channels, self.cfg.out_channels)
+    def forward(self, **kwargs):
+        cond_tensors = []
+        for cond_name in self.cfg.conditions:
+            assert cond_name in kwargs
+            cond = kwargs[cond_name]
+            # cond in shape (B, Nv, ...)
+            cond_tensors.append(cond.view(*cond.shape[:2], -1))
+        cond_tensor = torch.cat(cond_tensors, dim=-1)
+        assert cond_tensor.shape[-1] == self.cfg.in_channels
+        embedding = self.linear(cond_tensor)
+        return embedding

sf3d/models/sf3d_models_isosurface.py ADDED Viewed

	@@ -0,0 +1,229 @@

+from typing import Optional, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+from jaxtyping import Float, Integer
+from torch import Tensor
+from .mesh import Mesh
+class IsosurfaceHelper(nn.Module):
+    points_range: Tuple[float, float] = (0, 1)
+    @property
+    def grid_vertices(self) -> Float[Tensor, "N 3"]:
+        raise NotImplementedError
+    @property
+    def requires_instance_per_batch(self) -> bool:
+        return False
+class MarchingTetrahedraHelper(IsosurfaceHelper):
+    def __init__(self, resolution: int, tets_path: str):
+        super().__init__()
+        self.resolution = resolution
+        self.tets_path = tets_path
+        self.triangle_table: Float[Tensor, "..."]
+        self.register_buffer(
+            "triangle_table",
+            torch.as_tensor(
+                [
+                    [-1, -1, -1, -1, -1, -1],
+                    [1, 0, 2, -1, -1, -1],
+                    [4, 0, 3, -1, -1, -1],
+                    [1, 4, 2, 1, 3, 4],
+                    [3, 1, 5, -1, -1, -1],
+                    [2, 3, 0, 2, 5, 3],
+                    [1, 4, 0, 1, 5, 4],
+                    [4, 2, 5, -1, -1, -1],
+                    [4, 5, 2, -1, -1, -1],
+                    [4, 1, 0, 4, 5, 1],
+                    [3, 2, 0, 3, 5, 2],
+                    [1, 3, 5, -1, -1, -1],
+                    [4, 1, 2, 4, 3, 1],
+                    [3, 0, 4, -1, -1, -1],
+                    [2, 0, 1, -1, -1, -1],
+                    [-1, -1, -1, -1, -1, -1],
+                ],
+                dtype=torch.long,
+            ),
+            persistent=False,
+        )
+        self.num_triangles_table: Integer[Tensor, "..."]
+        self.register_buffer(
+            "num_triangles_table",
+            torch.as_tensor(
+                [0, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 0], dtype=torch.long
+            ),
+            persistent=False,
+        )
+        self.base_tet_edges: Integer[Tensor, "..."]
+        self.register_buffer(
+            "base_tet_edges",
+            torch.as_tensor([0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3], dtype=torch.long),
+            persistent=False,
+        )
+        tets = np.load(self.tets_path)
+        self._grid_vertices: Float[Tensor, "..."]
+        self.register_buffer(
+            "_grid_vertices",
+            torch.from_numpy(tets["vertices"]).float(),
+            persistent=False,
+        )
+        self.indices: Integer[Tensor, "..."]
+        self.register_buffer(
+            "indices", torch.from_numpy(tets["indices"]).long(), persistent=False
+        )
+        self._all_edges: Optional[Integer[Tensor, "Ne 2"]] = None
+        center_indices, boundary_indices = self.get_center_boundary_index(
+            self._grid_vertices
+        )
+        self.center_indices: Integer[Tensor, "..."]
+        self.register_buffer("center_indices", center_indices, persistent=False)
+        self.boundary_indices: Integer[Tensor, "..."]
+        self.register_buffer("boundary_indices", boundary_indices, persistent=False)
+    def get_center_boundary_index(self, verts):
+        magn = torch.sum(verts**2, dim=-1)
+        center_idx = torch.argmin(magn)
+        boundary_neg = verts == verts.max()
+        boundary_pos = verts == verts.min()
+        boundary = torch.bitwise_or(boundary_pos, boundary_neg)
+        boundary = torch.sum(boundary.float(), dim=-1)
+        boundary_idx = torch.nonzero(boundary)
+        return center_idx, boundary_idx.squeeze(dim=-1)
+    def normalize_grid_deformation(
+        self, grid_vertex_offsets: Float[Tensor, "Nv 3"]
+    ) -> Float[Tensor, "Nv 3"]:
+        return (
+            (self.points_range[1] - self.points_range[0])
+            / self.resolution  # half tet size is approximately 1 / self.resolution
+            * torch.tanh(grid_vertex_offsets)
+        )  # FIXME: hard-coded activation
+    @property
+    def grid_vertices(self) -> Float[Tensor, "Nv 3"]:
+        return self._grid_vertices
+    @property
+    def all_edges(self) -> Integer[Tensor, "Ne 2"]:
+        if self._all_edges is None:
+            # compute edges on GPU, or it would be VERY SLOW (basically due to the unique operation)
+            edges = torch.tensor(
+                [0, 1, 0, 2, 0, 3, 1, 2, 1, 3, 2, 3],
+                dtype=torch.long,
+                device=self.indices.device,
+            )
+            _all_edges = self.indices[:, edges].reshape(-1, 2)
+            _all_edges_sorted = torch.sort(_all_edges, dim=1)[0]
+            _all_edges = torch.unique(_all_edges_sorted, dim=0)
+            self._all_edges = _all_edges
+        return self._all_edges
+    def sort_edges(self, edges_ex2):
+        with torch.no_grad():
+            order = (edges_ex2[:, 0] > edges_ex2[:, 1]).long()
+            order = order.unsqueeze(dim=1)
+            a = torch.gather(input=edges_ex2, index=order, dim=1)
+            b = torch.gather(input=edges_ex2, index=1 - order, dim=1)
+        return torch.stack([a, b], -1)
+    def _forward(self, pos_nx3, sdf_n, tet_fx4):
+        with torch.no_grad():
+            occ_n = sdf_n > 0
+            occ_fx4 = occ_n[tet_fx4.reshape(-1)].reshape(-1, 4)
+            occ_sum = torch.sum(occ_fx4, -1)
+            valid_tets = (occ_sum > 0) & (occ_sum < 4)
+            occ_sum = occ_sum[valid_tets]
+            # find all vertices
+            all_edges = tet_fx4[valid_tets][:, self.base_tet_edges].reshape(-1, 2)
+            all_edges = self.sort_edges(all_edges)
+            unique_edges, idx_map = torch.unique(all_edges, dim=0, return_inverse=True)
+            unique_edges = unique_edges.long()
+            mask_edges = occ_n[unique_edges.reshape(-1)].reshape(-1, 2).sum(-1) == 1
+            mapping = (
+                torch.ones(
+                    (unique_edges.shape[0]), dtype=torch.long, device=pos_nx3.device
+                )
+                * -1
+            )
+            mapping[mask_edges] = torch.arange(
+                mask_edges.sum(), dtype=torch.long, device=pos_nx3.device
+            )
+            idx_map = mapping[idx_map]  # map edges to verts
+            interp_v = unique_edges[mask_edges]
+        edges_to_interp = pos_nx3[interp_v.reshape(-1)].reshape(-1, 2, 3)
+        edges_to_interp_sdf = sdf_n[interp_v.reshape(-1)].reshape(-1, 2, 1)
+        edges_to_interp_sdf[:, -1] *= -1
+        denominator = edges_to_interp_sdf.sum(1, keepdim=True)
+        edges_to_interp_sdf = torch.flip(edges_to_interp_sdf, [1]) / denominator
+        verts = (edges_to_interp * edges_to_interp_sdf).sum(1)
+        idx_map = idx_map.reshape(-1, 6)
+        v_id = torch.pow(2, torch.arange(4, dtype=torch.long, device=pos_nx3.device))
+        tetindex = (occ_fx4[valid_tets] * v_id.unsqueeze(0)).sum(-1)
+        num_triangles = self.num_triangles_table[tetindex]
+        # Generate triangle indices
+        faces = torch.cat(
+            (
+                torch.gather(
+                    input=idx_map[num_triangles == 1],
+                    dim=1,
+                    index=self.triangle_table[tetindex[num_triangles == 1]][:, :3],
+                ).reshape(-1, 3),
+                torch.gather(
+                    input=idx_map[num_triangles == 2],
+                    dim=1,
+                    index=self.triangle_table[tetindex[num_triangles == 2]][:, :6],
+                ).reshape(-1, 3),
+            ),
+            dim=0,
+        )
+        return verts, faces
+    def forward(
+        self,
+        level: Float[Tensor, "N3 1"],
+        deformation: Optional[Float[Tensor, "N3 3"]] = None,
+    ) -> Mesh:
+        if deformation is not None:
+            grid_vertices = self.grid_vertices + self.normalize_grid_deformation(
+                deformation
+            )
+        else:
+            grid_vertices = self.grid_vertices
+        v_pos, t_pos_idx = self._forward(grid_vertices, level, self.indices)
+        mesh = Mesh(
+            v_pos=v_pos,
+            t_pos_idx=t_pos_idx,
+            # extras
+            grid_vertices=grid_vertices,
+            tet_edges=self.all_edges,
+            grid_level=level,
+            grid_deformation=deformation,
+        )
+        return mesh

sf3d/models/sf3d_models_mesh.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from __future__ import annotations
+from typing import Any, Dict, Optional
+import torch
+import torch.nn.functional as F
+from jaxtyping import Float, Integer
+from torch import Tensor
+from sf3d.box_uv_unwrap import box_projection_uv_unwrap
+from sf3d.models.utils import dot
+class Mesh:
+    def __init__(
+        self, v_pos: Float[Tensor, "Nv 3"], t_pos_idx: Integer[Tensor, "Nf 3"], **kwargs
+    ) -> None:
+        self.v_pos: Float[Tensor, "Nv 3"] = v_pos
+        self.t_pos_idx: Integer[Tensor, "Nf 3"] = t_pos_idx
+        self._v_nrm: Optional[Float[Tensor, "Nv 3"]] = None
+        self._v_tng: Optional[Float[Tensor, "Nv 3"]] = None
+        self._v_tex: Optional[Float[Tensor, "Nt 3"]] = None
+        self._edges: Optional[Integer[Tensor, "Ne 2"]] = None
+        self.extras: Dict[str, Any] = {}
+        for k, v in kwargs.items():
+            self.add_extra(k, v)
+    def add_extra(self, k, v) -> None:
+        self.extras[k] = v
+    @property
+    def requires_grad(self):
+        return self.v_pos.requires_grad
+    @property
+    def v_nrm(self):
+        if self._v_nrm is None:
+            self._v_nrm = self._compute_vertex_normal()
+        return self._v_nrm
+    @property
+    def v_tng(self):
+        if self._v_tng is None:
+            self._v_tng = self._compute_vertex_tangent()
+        return self._v_tng
+    @property
+    def v_tex(self):
+        if self._v_tex is None:
+            self.unwrap_uv()
+        return self._v_tex
+    @property
+    def edges(self):
+        if self._edges is None:
+            self._edges = self._compute_edges()
+        return self._edges
+    def _compute_vertex_normal(self):
+        i0 = self.t_pos_idx[:, 0]
+        i1 = self.t_pos_idx[:, 1]
+        i2 = self.t_pos_idx[:, 2]
+        v0 = self.v_pos[i0, :]
+        v1 = self.v_pos[i1, :]
+        v2 = self.v_pos[i2, :]
+        face_normals = torch.cross(v1 - v0, v2 - v0, dim=-1)
+        # Splat face normals to vertices
+        v_nrm = torch.zeros_like(self.v_pos)
+        v_nrm.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals)
+        v_nrm.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals)
+        v_nrm.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals)
+        # Normalize, replace zero (degenerated) normals with some default value
+        v_nrm = torch.where(
+            dot(v_nrm, v_nrm) > 1e-20, v_nrm, torch.as_tensor([0.0, 0.0, 1.0]).to(v_nrm)
+        )
+        v_nrm = F.normalize(v_nrm, dim=1)
+        if torch.is_anomaly_enabled():
+            assert torch.all(torch.isfinite(v_nrm))
+        return v_nrm
+    def _compute_vertex_tangent(self):
+        vn_idx = [None] * 3
+        pos = [None] * 3
+        tex = [None] * 3
+        for i in range(0, 3):
+            pos[i] = self.v_pos[self.t_pos_idx[:, i]]
+            tex[i] = self.v_tex[self.t_pos_idx[:, i]]
+            # t_nrm_idx is always the same as t_pos_idx
+            vn_idx[i] = self.t_pos_idx[:, i]
+        tangents = torch.zeros_like(self.v_nrm)
+        tansum = torch.zeros_like(self.v_nrm)
+        # Compute tangent space for each triangle
+        duv1 = tex[1] - tex[0]
+        duv2 = tex[2] - tex[0]
+        dpos1 = pos[1] - pos[0]
+        dpos2 = pos[2] - pos[0]
+        tng_nom = dpos1 * duv2[..., 1:2] - dpos2 * duv1[..., 1:2]
+        denom = duv1[..., 0:1] * duv2[..., 1:2] - duv1[..., 1:2] * duv2[..., 0:1]
+        # Avoid division by zero for degenerated texture coordinates
+        denom_safe = denom.clip(1e-6)
+        tang = tng_nom / denom_safe
+        # Update all 3 vertices
+        for i in range(0, 3):
+            idx = vn_idx[i][:, None].repeat(1, 3)
+            tangents.scatter_add_(0, idx, tang)  # tangents[n_i] = tangents[n_i] + tang
+            tansum.scatter_add_(
+                0, idx, torch.ones_like(tang)
+            )  # tansum[n_i] = tansum[n_i] + 1
+        # Also normalize it. Here we do not normalize the individual triangles first so larger area
+        # triangles influence the tangent space more
+        tangents = tangents / tansum
+        # Normalize and make sure tangent is perpendicular to normal
+        tangents = F.normalize(tangents, dim=1)
+        tangents = F.normalize(tangents - dot(tangents, self.v_nrm) * self.v_nrm)
+        if torch.is_anomaly_enabled():
+            assert torch.all(torch.isfinite(tangents))
+        return tangents
+    @torch.no_grad()
+    def unwrap_uv(
+        self,
+        island_padding: float = 0.02,
+    ) -> Mesh:
+        uv, indices = box_projection_uv_unwrap(
+            self.v_pos, self.v_nrm, self.t_pos_idx, island_padding
+        )
+        # Do store per vertex UVs.
+        # This means we need to duplicate some vertices at the seams
+        individual_vertices = self.v_pos[self.t_pos_idx].reshape(-1, 3)
+        individual_faces = torch.arange(
+            individual_vertices.shape[0],
+            device=individual_vertices.device,
+            dtype=self.t_pos_idx.dtype,
+        ).reshape(-1, 3)
+        uv_flat = uv[indices].reshape((-1, 2))
+        # uv_flat[:, 1] = 1 - uv_flat[:, 1]
+        self.v_pos = individual_vertices
+        self.t_pos_idx = individual_faces
+        self._v_tex = uv_flat
+        self._v_nrm = self._compute_vertex_normal()
+        self._v_tng = self._compute_vertex_tangent()
+    def _compute_edges(self):
+        # Compute edges
+        edges = torch.cat(
+            [
+                self.t_pos_idx[:, [0, 1]],
+                self.t_pos_idx[:, [1, 2]],
+                self.t_pos_idx[:, [2, 0]],
+            ],
+            dim=0,
+        )
+        edges = edges.sort()[0]
+        edges = torch.unique(edges, dim=0)
+        return edges

sf3d/models/sf3d_models_network.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from dataclasses import dataclass, field
+from typing import Callable, List, Optional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from jaxtyping import Float
+from torch import Tensor
+from torch.autograd import Function
+from torch.cuda.amp import custom_bwd, custom_fwd
+from sf3d.models.utils import BaseModule, normalize
+class PixelShuffleUpsampleNetwork(BaseModule):
+    @dataclass
+    class Config(BaseModule.Config):
+        in_channels: int = 1024
+        out_channels: int = 40
+        scale_factor: int = 4
+        conv_layers: int = 4
+        conv_kernel_size: int = 3
+    cfg: Config
+    def configure(self) -> None:
+        layers = []
+        output_channels = self.cfg.out_channels * self.cfg.scale_factor**2
+        in_channels = self.cfg.in_channels
+        for i in range(self.cfg.conv_layers):
+            cur_out_channels = (
+                in_channels if i != self.cfg.conv_layers - 1 else output_channels
+            )
+            layers.append(
+                nn.Conv2d(
+                    in_channels,
+                    cur_out_channels,
+                    self.cfg.conv_kernel_size,
+                    padding=(self.cfg.conv_kernel_size - 1) // 2,
+                )
+            )
+            if i != self.cfg.conv_layers - 1:
+                layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.PixelShuffle(self.cfg.scale_factor))
+        self.upsample = nn.Sequential(*layers)
+    def forward(
+        self, triplanes: Float[Tensor, "B 3 Ci Hp Wp"]
+    ) -> Float[Tensor, "B 3 Co Hp2 Wp2"]:
+        return rearrange(
+            self.upsample(
+                rearrange(triplanes, "B Np Ci Hp Wp -> (B Np) Ci Hp Wp", Np=3)
+            ),
+            "(B Np) Co Hp Wp -> B Np Co Hp Wp",
+            Np=3,
+        )
+class _TruncExp(Function):  # pylint: disable=abstract-method
+    # Implementation from torch-ngp:
+    # https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py
+    @staticmethod
+    @custom_fwd(cast_inputs=torch.float32)
+    def forward(ctx, x):  # pylint: disable=arguments-differ
+        ctx.save_for_backward(x)
+        return torch.exp(x)
+    @staticmethod
+    @custom_bwd
+    def backward(ctx, g):  # pylint: disable=arguments-differ
+        x = ctx.saved_tensors[0]
+        return g * torch.exp(torch.clamp(x, max=15))
+trunc_exp = _TruncExp.apply
+def get_activation(name) -> Callable:
+    if name is None:
+        return lambda x: x
+    name = name.lower()
+    if name == "none" or name == "linear" or name == "identity":
+        return lambda x: x
+    elif name == "lin2srgb":
+        return lambda x: torch.where(
+            x > 0.0031308,
+            torch.pow(torch.clamp(x, min=0.0031308), 1.0 / 2.4) * 1.055 - 0.055,
+            12.92 * x,
+        ).clamp(0.0, 1.0)
+    elif name == "exp":
+        return lambda x: torch.exp(x)
+    elif name == "shifted_exp":
+        return lambda x: torch.exp(x - 1.0)
+    elif name == "trunc_exp":
+        return trunc_exp
+    elif name == "shifted_trunc_exp":
+        return lambda x: trunc_exp(x - 1.0)
+    elif name == "sigmoid":
+        return lambda x: torch.sigmoid(x)
+    elif name == "tanh":
+        return lambda x: torch.tanh(x)
+    elif name == "shifted_softplus":
+        return lambda x: F.softplus(x - 1.0)
+    elif name == "scale_-11_01":
+        return lambda x: x * 0.5 + 0.5
+    elif name == "negative":
+        return lambda x: -x
+    elif name == "normalize_channel_last":
+        return lambda x: normalize(x)
+    elif name == "normalize_channel_first":
+        return lambda x: normalize(x, dim=1)
+    else:
+        try:
+            return getattr(F, name)
+        except AttributeError:
+            raise ValueError(f"Unknown activation function: {name}")
+@dataclass
+class HeadSpec:
+    name: str
+    out_channels: int
+    n_hidden_layers: int
+    output_activation: Optional[str] = None
+    out_bias: float = 0.0
+class MaterialMLP(BaseModule):
+    @dataclass
+    class Config(BaseModule.Config):
+        in_channels: int = 120
+        n_neurons: int = 64
+        activation: str = "silu"
+        heads: List[HeadSpec] = field(default_factory=lambda: [])
+    cfg: Config
+    def configure(self) -> None:
+        assert len(self.cfg.heads) > 0
+        heads = {}
+        for head in self.cfg.heads:
+            head_layers = []
+            for i in range(head.n_hidden_layers):
+                head_layers += [
+                    nn.Linear(
+                        self.cfg.in_channels if i == 0 else self.cfg.n_neurons,
+                        self.cfg.n_neurons,
+                    ),
+                    self.make_activation(self.cfg.activation),
+                ]
+            head_layers += [
+                nn.Linear(
+                    self.cfg.n_neurons,
+                    head.out_channels,
+                ),
+            ]
+            heads[head.name] = nn.Sequential(*head_layers)
+        self.heads = nn.ModuleDict(heads)
+    def make_activation(self, activation):
+        if activation == "relu":
+            return nn.ReLU(inplace=True)
+        elif activation == "silu":
+            return nn.SiLU(inplace=True)
+        else:
+            raise NotImplementedError
+    def keys(self):
+        return self.heads.keys()
+    def forward(
+        self, x, include: Optional[List] = None, exclude: Optional[List] = None
+    ):
+        if include is not None and exclude is not None:
+            raise ValueError("Cannot specify both include and exclude.")
+        if include is not None:
+            heads = [h for h in self.cfg.heads if h.name in include]
+        elif exclude is not None:
+            heads = [h for h in self.cfg.heads if h.name not in exclude]
+        else:
+            heads = self.cfg.heads
+        out = {
+            head.name: get_activation(head.output_activation)(
+                self.heads[head.name](x) + head.out_bias
+            )
+            for head in heads
+        }
+        return out

sf3d/models/sf3d_models_utils.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import dataclasses
+import importlib
+import math
+from dataclasses import dataclass
+from typing import Any, List, Optional, Tuple, Union
+import numpy as np
+import PIL
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from jaxtyping import Bool, Float, Int, Num
+from omegaconf import DictConfig, OmegaConf
+from torch import Tensor
+class BaseModule(nn.Module):
+    @dataclass
+    class Config:
+        pass
+    cfg: Config  # add this to every subclass of BaseModule to enable static type checking
+    def __init__(
+        self, cfg: Optional[Union[dict, DictConfig]] = None, *args, **kwargs
+    ) -> None:
+        super().__init__()
+        self.cfg = parse_structured(self.Config, cfg)
+        self.configure(*args, **kwargs)
+    def configure(self, *args, **kwargs) -> None:
+        raise NotImplementedError
+def find_class(cls_string):
+    module_string = ".".join(cls_string.split(".")[:-1])
+    cls_name = cls_string.split(".")[-1]
+    module = importlib.import_module(module_string, package=None)
+    cls = getattr(module, cls_name)
+    return cls
+def parse_structured(fields: Any, cfg: Optional[Union[dict, DictConfig]] = None) -> Any:
+    # Check if cfg.keys are in fields
+    cfg_ = cfg.copy()
+    keys = list(cfg_.keys())
+    field_names = {f.name for f in dataclasses.fields(fields)}
+    for key in keys:
+        # This is helpful when swapping out modules from CLI
+        if key not in field_names:
+            print(f"Ignoring {key} as it's not supported by {fields}")
+            cfg_.pop(key)
+    scfg = OmegaConf.merge(OmegaConf.structured(fields), cfg_)
+    return scfg
+EPS_DTYPE = {
+    torch.float16: 1e-4,
+    torch.bfloat16: 1e-4,
+    torch.float32: 1e-7,
+    torch.float64: 1e-8,
+}
+def dot(x, y, dim=-1):
+    return torch.sum(x * y, dim, keepdim=True)
+def reflect(x, n):
+    return x - 2 * dot(x, n) * n
+def normalize(x, dim=-1, eps=None):
+    if eps is None:
+        eps = EPS_DTYPE[x.dtype]
+    return F.normalize(x, dim=dim, p=2, eps=eps)
+def tri_winding(tri: Float[Tensor, "*B 3 2"]) -> Float[Tensor, "*B 3 3"]:
+    # One pad for determinant
+    tri_sq = F.pad(tri, (0, 1), "constant", 1.0)
+    det_tri = torch.det(tri_sq)
+    tri_rev = torch.cat(
+        (tri_sq[..., 0:1, :], tri_sq[..., 2:3, :], tri_sq[..., 1:2, :]), -2
+    )
+    tri_sq[det_tri < 0] = tri_rev[det_tri < 0]
+    return tri_sq
+def triangle_intersection_2d(
+    t1: Float[Tensor, "*B 3 2"],
+    t2: Float[Tensor, "*B 3 2"],
+    eps=1e-12,
+) -> Float[Tensor, "*B"]:  # noqa: F821
+    """Returns True if triangles collide, False otherwise"""
+    def chk_edge(x: Float[Tensor, "*B 3 3"]) -> Bool[Tensor, "*B"]:  # noqa: F821
+        logdetx = torch.logdet(x.double())
+        if eps is None:
+            return ~torch.isfinite(logdetx)
+        return ~(torch.isfinite(logdetx) & (logdetx > math.log(eps)))
+    t1s = tri_winding(t1)
+    t2s = tri_winding(t2)
+    # Assume the triangles do not collide in the begging
+    ret = torch.zeros(t1.shape[0], dtype=torch.bool, device=t1.device)
+    for i in range(3):
+        edge = torch.roll(t1s, i, dims=1)[:, :2, :]
+        # Check if all points of triangle 2 lay on the external side of edge E.
+        # If this is the case the triangle do not collide
+        upd = (
+            chk_edge(torch.cat((edge, t2s[:, 0:1]), 1))
+            & chk_edge(torch.cat((edge, t2s[:, 1:2]), 1))
+            & chk_edge(torch.cat((edge, t2s[:, 2:3]), 1))
+        )
+        # Here no collision is still True due to inversion
+        ret = ret | upd
+    for i in range(3):
+        edge = torch.roll(t2s, i, dims=1)[:, :2, :]
+        upd = (
+            chk_edge(torch.cat((edge, t1s[:, 0:1]), 1))
+            & chk_edge(torch.cat((edge, t1s[:, 1:2]), 1))
+            & chk_edge(torch.cat((edge, t1s[:, 2:3]), 1))
+        )
+        # Here no collision is still True due to inversion
+        ret = ret | upd
+    return ~ret  # Do the inversion
+ValidScale = Union[Tuple[float, float], Num[Tensor, "2 D"]]
+def scale_tensor(
+    dat: Num[Tensor, "... D"], inp_scale: ValidScale, tgt_scale: ValidScale
+):
+    if inp_scale is None:
+        inp_scale = (0, 1)
+    if tgt_scale is None:
+        tgt_scale = (0, 1)
+    if isinstance(tgt_scale, Tensor):
+        assert dat.shape[-1] == tgt_scale.shape[-1]
+    dat = (dat - inp_scale[0]) / (inp_scale[1] - inp_scale[0])
+    dat = dat * (tgt_scale[1] - tgt_scale[0]) + tgt_scale[0]
+    return dat
+def dilate_fill(img, mask, iterations=10):
+    oldMask = mask.float()
+    oldImg = img
+    mask_kernel = torch.ones(
+        (1, 1, 3, 3),
+        dtype=oldMask.dtype,
+        device=oldMask.device,
+    )
+    for i in range(iterations):
+        newMask = torch.nn.functional.max_pool2d(oldMask, 3, 1, 1)
+        # Fill the extension with mean color of old valid regions
+        img_unfold = F.unfold(oldImg, (3, 3)).view(1, 3, 3 * 3, -1)
+        mask_unfold = F.unfold(oldMask, (3, 3)).view(1, 1, 3 * 3, -1)
+        new_mask_unfold = F.unfold(newMask, (3, 3)).view(1, 1, 3 * 3, -1)
+        # Average color of the valid region
+        mean_color = (img_unfold.sum(dim=2) / mask_unfold.sum(dim=2).clip(1)).unsqueeze(
+            2
+        )
+        # Extend it to the new region
+        fill_color = (mean_color * new_mask_unfold).view(1, 3 * 3 * 3, -1)
+        mask_conv = F.conv2d(
+            newMask, mask_kernel, padding=1
+        )  # Get the sum for each kernel patch
+        newImg = F.fold(
+            fill_color, (img.shape[-2], img.shape[-1]), (3, 3)
+        ) / mask_conv.clamp(1)
+        diffMask = newMask - oldMask
+        oldMask = newMask
+        oldImg = torch.lerp(oldImg, newImg, diffMask)
+    return oldImg
+def float32_to_uint8_np(
+    x: Float[np.ndarray, "*B H W C"],
+    dither: bool = True,
+    dither_mask: Optional[Float[np.ndarray, "*B H W C"]] = None,
+    dither_strength: float = 1.0,
+) -> Int[np.ndarray, "*B H W C"]:
+    if dither:
+        dither = (
+            dither_strength * np.random.rand(*x[..., :1].shape).astype(np.float32) - 0.5
+        )
+        if dither_mask is not None:
+            dither = dither * dither_mask
+        return np.clip(np.floor((256.0 * x + dither)), 0, 255).astype(np.uint8)
+    return np.clip(np.floor((256.0 * x)), 0, 255).astype(torch.uint8)
+def convert_data(data):
+    if data is None:
+        return None
+    elif isinstance(data, np.ndarray):
+        return data
+    elif isinstance(data, torch.Tensor):
+        if data.dtype in [torch.float16, torch.bfloat16]:
+            data = data.float()
+        return data.detach().cpu().numpy()
+    elif isinstance(data, list):
+        return [convert_data(d) for d in data]
+    elif isinstance(data, dict):
+        return {k: convert_data(v) for k, v in data.items()}
+    else:
+        raise TypeError(
+            "Data must be in type numpy.ndarray, torch.Tensor, list or dict, getting",
+            type(data),
+        )
+class ImageProcessor:
+    def convert_and_resize(
+        self,
+        image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
+        size: int,
+    ):
+        if isinstance(image, PIL.Image.Image):
+            image = torch.from_numpy(np.array(image).astype(np.float32) / 255.0)
+        elif isinstance(image, np.ndarray):
+            if image.dtype == np.uint8:
+                image = torch.from_numpy(image.astype(np.float32) / 255.0)
+            else:
+                image = torch.from_numpy(image)
+        elif isinstance(image, torch.Tensor):
+            pass
+        batched = image.ndim == 4
+        if not batched:
+            image = image[None, ...]
+        image = F.interpolate(
+            image.permute(0, 3, 1, 2),
+            (size, size),
+            mode="bilinear",
+            align_corners=False,
+            antialias=True,
+        ).permute(0, 2, 3, 1)
+        if not batched:
+            image = image[0]
+        return image
+    def __call__(
+        self,
+        image: Union[
+            PIL.Image.Image,
+            np.ndarray,
+            torch.FloatTensor,
+            List[PIL.Image.Image],
+            List[np.ndarray],
+            List[torch.FloatTensor],
+        ],
+        size: int,
+    ) -> Any:
+        if isinstance(image, (np.ndarray, torch.FloatTensor)) and image.ndim == 4:
+            image = self.convert_and_resize(image, size)
+        else:
+            if not isinstance(image, list):
+                image = [image]
+            image = [self.convert_and_resize(im, size) for im in image]
+            image = torch.stack(image, dim=0)
+        return image
+def get_intrinsic_from_fov(fov, H, W, bs=-1):
+    focal_length = 0.5 * H / np.tan(0.5 * fov)
+    intrinsic = np.identity(3, dtype=np.float32)
+    intrinsic[0, 0] = focal_length
+    intrinsic[1, 1] = focal_length
+    intrinsic[0, 2] = W / 2.0
+    intrinsic[1, 2] = H / 2.0
+    if bs > 0:
+        intrinsic = intrinsic[None].repeat(bs, axis=0)
+    return torch.from_numpy(intrinsic)