SORA-3D

Paused

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

317ed10

verified ·

1 Parent(s): 45e40db

Upload 5 files

Browse files

Files changed (5) hide show

sf3d/sf3d_box_uv_unwrap.py +610 -0
sf3d/sf3d_system.py +482 -0
sf3d/sf3d_texture_baker.py +87 -0
sf3d/sf3d_texture_baker.slang +93 -0
sf3d/sf3d_utils.py +91 -0

sf3d/sf3d_box_uv_unwrap.py ADDED Viewed

	@@ -0,0 +1,610 @@

+import math
+from typing import Tuple
+import torch
+import torch.nn.functional as F
+from jaxtyping import Float, Integer
+from torch import Tensor
+from sf3d.models.utils import dot, triangle_intersection_2d
+def _box_assign_vertex_to_cube_face(
+    vertex_positions: Float[Tensor, "Nv 3"],
+    vertex_normals: Float[Tensor, "Nv 3"],
+    triangle_idxs: Integer[Tensor, "Nf 3"],
+    bbox: Float[Tensor, "2 3"],
+) -> Tuple[Float[Tensor, "Nf 3 2"], Integer[Tensor, "Nf 3"]]:
+    # Test to not have a scaled model to fit the space better
+    # bbox_min = bbox[:1].mean(-1, keepdim=True)
+    # bbox_max = bbox[1:].mean(-1, keepdim=True)
+    # v_pos_normalized = (vertex_positions - bbox_min) / (bbox_max - bbox_min)
+    # Create a [0, 1] normalized vertex position
+    v_pos_normalized = (vertex_positions - bbox[:1]) / (bbox[1:] - bbox[:1])
+    # And to [-1, 1]
+    v_pos_normalized = 2.0 * v_pos_normalized - 1.0
+    # Get all vertex positions for each triangle
+    # Now how do we define to which face the triangle belongs? Mean face pos? Max vertex pos?
+    v0 = v_pos_normalized[triangle_idxs[:, 0]]
+    v1 = v_pos_normalized[triangle_idxs[:, 1]]
+    v2 = v_pos_normalized[triangle_idxs[:, 2]]
+    tri_stack = torch.stack([v0, v1, v2], dim=1)
+    vn0 = vertex_normals[triangle_idxs[:, 0]]
+    vn1 = vertex_normals[triangle_idxs[:, 1]]
+    vn2 = vertex_normals[triangle_idxs[:, 2]]
+    tri_stack_nrm = torch.stack([vn0, vn1, vn2], dim=1)
+    # Just average the normals per face
+    face_normal = F.normalize(torch.sum(tri_stack_nrm, 1), eps=1e-6, dim=-1)
+    # Now decide based on the face normal in which box map we project
+    # abs_x, abs_y, abs_z = tri_stack_nrm.abs().unbind(-1)
+    abs_x, abs_y, abs_z = tri_stack.abs().unbind(-1)
+    axis = torch.tensor(
+        [
+            [1, 0, 0],  # 0
+            [-1, 0, 0],  # 1
+            [0, 1, 0],  # 2
+            [0, -1, 0],  # 3
+            [0, 0, 1],  # 4
+            [0, 0, -1],  # 5
+        ],
+        device=face_normal.device,
+        dtype=face_normal.dtype,
+    )
+    face_normal_axis = (face_normal[:, None] * axis[None]).sum(-1)
+    index = face_normal_axis.argmax(-1)
+    max_axis, uc, vc = (
+        torch.ones_like(abs_x),
+        torch.zeros_like(tri_stack[..., :1]),
+        torch.zeros_like(tri_stack[..., :1]),
+    )
+    mask_pos_x = index == 0
+    max_axis[mask_pos_x] = abs_x[mask_pos_x]
+    uc[mask_pos_x] = tri_stack[mask_pos_x][..., 1:2]
+    vc[mask_pos_x] = -tri_stack[mask_pos_x][..., -1:]
+    mask_neg_x = index == 1
+    max_axis[mask_neg_x] = abs_x[mask_neg_x]
+    uc[mask_neg_x] = tri_stack[mask_neg_x][..., 1:2]
+    vc[mask_neg_x] = -tri_stack[mask_neg_x][..., -1:]
+    mask_pos_y = index == 2
+    max_axis[mask_pos_y] = abs_y[mask_pos_y]
+    uc[mask_pos_y] = tri_stack[mask_pos_y][..., 0:1]
+    vc[mask_pos_y] = -tri_stack[mask_pos_y][..., -1:]
+    mask_neg_y = index == 3
+    max_axis[mask_neg_y] = abs_y[mask_neg_y]
+    uc[mask_neg_y] = tri_stack[mask_neg_y][..., 0:1]
+    vc[mask_neg_y] = -tri_stack[mask_neg_y][..., -1:]
+    mask_pos_z = index == 4
+    max_axis[mask_pos_z] = abs_z[mask_pos_z]
+    uc[mask_pos_z] = tri_stack[mask_pos_z][..., 0:1]
+    vc[mask_pos_z] = tri_stack[mask_pos_z][..., 1:2]
+    mask_neg_z = index == 5
+    max_axis[mask_neg_z] = abs_z[mask_neg_z]
+    uc[mask_neg_z] = tri_stack[mask_neg_z][..., 0:1]
+    vc[mask_neg_z] = -tri_stack[mask_neg_z][..., 1:2]
+    # UC from [-1, 1] to [0, 1]
+    max_dim_div = max_axis.max(dim=0, keepdims=True).values
+    uc = ((uc[..., 0] / max_dim_div + 1.0) * 0.5).clip(0, 1)
+    vc = ((vc[..., 0] / max_dim_div + 1.0) * 0.5).clip(0, 1)
+    uv = torch.stack([uc, vc], dim=-1)
+    return uv, index
+def _assign_faces_uv_to_atlas_index(
+    vertex_positions: Float[Tensor, "Nv 3"],
+    triangle_idxs: Integer[Tensor, "Nf 3"],
+    face_uv: Float[Tensor, "Nf 3 2"],
+    face_index: Integer[Tensor, "Nf 3"],
+) -> Integer[Tensor, "Nf"]:  # noqa: F821
+    triangle_pos = vertex_positions[triangle_idxs]
+    # We need to do perform 3 overlap checks.
+    # The first set is placed in the upper two thirds of the UV atlas.
+    # Conceptually, this is the direct visible surfaces from the each cube side
+    # The second set is placed in the lower thirds and the left half of the UV atlas.
+    # This is the first set of occluded surfaces. They will also be saved in the projected fashion
+    # The third pass finds all non assigned faces. They will be placed in the bottom right half of
+    # the UV atlas in scattered fashion.
+    assign_idx = face_index.clone()
+    for overlap_step in range(3):
+        overlapping_indicator = torch.zeros_like(assign_idx, dtype=torch.bool)
+        for i in range(overlap_step * 6, (overlap_step + 1) * 6):
+            mask = assign_idx == i
+            if not mask.any():
+                continue
+            # Get all elements belonging to the projection face
+            uv_triangle = face_uv[mask]
+            cur_triangle_pos = triangle_pos[mask]
+            # Find the center of the uv coordinates
+            center_uv = uv_triangle.mean(dim=1, keepdim=True)
+            # And also the radius of the triangle
+            uv_triangle_radius = (uv_triangle - center_uv).norm(dim=-1).max(-1).values
+            potentially_overlapping_mask = (
+                # Find all close triangles
+                (center_uv[None, ...] - center_uv[:, None]).norm(dim=-1)
+                # Do not select the same element by offseting with an large valued identity matrix
+                + torch.eye(
+                    uv_triangle.shape[0],
+                    device=uv_triangle.device,
+                    dtype=uv_triangle.dtype,
+                ).unsqueeze(-1)
+                * 1000
+            )
+            # Mark all potentially overlapping triangles to reduce the number of triangle intersection tests
+            potentially_overlapping_mask = (
+                potentially_overlapping_mask
+                <= (uv_triangle_radius.view(-1, 1, 1) * 3.0)
+            ).squeeze(-1)
+            overlap_coords = torch.stack(torch.where(potentially_overlapping_mask), -1)
+            # Only unique triangles (A|B and B|A should be the same)
+            f = torch.min(overlap_coords, dim=-1).values
+            s = torch.max(overlap_coords, dim=-1).values
+            overlap_coords = torch.unique(torch.stack([f, s], dim=1), dim=0)
+            first, second = overlap_coords.unbind(-1)
+            # Get the triangles
+            tri_1 = uv_triangle[first]
+            tri_2 = uv_triangle[second]
+            # Perform the actual set with the reduced number of potentially overlapping triangles
+            its = triangle_intersection_2d(tri_1, tri_2, eps=1e-6)
+            # So we now need to detect which triangles are the occluded ones.
+            # We always assume the first to be the visible one (the others should move)
+            # In the previous step we use a lexigraphical sort to get the unique pairs
+            # In this we use a sort based on the orthographic projection
+            ax = 0 if i < 2 else 1 if i < 4 else 2
+            use_max = i % 2 == 1
+            tri1_c = cur_triangle_pos[first].mean(dim=1)
+            tri2_c = cur_triangle_pos[second].mean(dim=1)
+            mark_first = (
+                (tri1_c[..., ax] > tri2_c[..., ax])
+                if use_max
+                else (tri1_c[..., ax] < tri2_c[..., ax])
+            )
+            first[mark_first] = second[mark_first]
+            # Lastly the same index can be tested multiple times.
+            # If one marks it as overlapping we keep it marked as such.
+            # We do this by testing if it has been marked at least once.
+            unique_idx, rev_idx = torch.unique(first, return_inverse=True)
+            add = torch.zeros_like(unique_idx, dtype=torch.float32)
+            add.index_add_(0, rev_idx, its.float())
+            its_mask = add > 0
+            # And fill it in the overlapping indicator
+            idx = torch.where(mask)[0][unique_idx]
+            overlapping_indicator[idx] = its_mask
+        # Move the index to the overlap regions (shift by 6)
+        assign_idx[overlapping_indicator] += 6
+    # We do not care about the correct face placement after the first 2 slices
+    max_idx = 6 * 2
+    return assign_idx.clamp(0, max_idx)
+def _find_slice_offset_and_scale(
+    index: Integer[Tensor, "Nf"],  # noqa: F821
+) -> Tuple[
+    Float[Tensor, "Nf"], Float[Tensor, "Nf"], Float[Tensor, "Nf"], Float[Tensor, "Nf"]  # noqa: F821
+]:  # noqa: F821
+    # 6 due to the 6 cube faces
+    off = 1 / 3
+    dupl_off = 1 / 6
+    # Here, we need to decide how to pack the textures in the case of overlap
+    def x_offset_calc(x, i):
+        offset_calc = i // 6
+        # Initial coordinates - just 3x2 grid
+        if offset_calc == 0:
+            return off * x
+        else:
+            # Smaller 3x2 grid plus eventual shift to right for
+            # second overlap
+            return dupl_off * x + min(offset_calc - 1, 1) * 0.5
+    def y_offset_calc(x, i):
+        offset_calc = i // 6
+        # Initial coordinates - just a 3x2 grid
+        if offset_calc == 0:
+            return off * x
+        else:
+            # Smaller coordinates in the lowest row
+            return dupl_off * x + off * 2
+    offset_x = torch.zeros_like(index, dtype=torch.float32)
+    offset_y = torch.zeros_like(index, dtype=torch.float32)
+    offset_x_vals = [0, 1, 2, 0, 1, 2]
+    offset_y_vals = [0, 0, 0, 1, 1, 1]
+    for i in range(index.max().item() + 1):
+        mask = index == i
+        if not mask.any():
+            continue
+        offset_x[mask] = x_offset_calc(offset_x_vals[i % 6], i)
+        offset_y[mask] = y_offset_calc(offset_y_vals[i % 6], i)
+    div_x = torch.full_like(index, 6 // 2, dtype=torch.float32)
+    # All overlap elements are saved in half scale
+    div_x[index >= 6] = 6
+    div_y = div_x.clone()  # Same for y
+    # Except for the random overlaps
+    div_x[index >= 12] = 2
+    # But the random overlaps are saved in a large block in the lower thirds
+    div_y[index >= 12] = 3
+    return offset_x, offset_y, div_x, div_y
+def rotation_flip_matrix_2d(
+    rad: float, flip_x: bool = False, flip_y: bool = False
+) -> Float[Tensor, "2 2"]:
+    cos = math.cos(rad)
+    sin = math.sin(rad)
+    rot_mat = torch.tensor([[cos, -sin], [sin, cos]], dtype=torch.float32)
+    flip_mat = torch.tensor(
+        [
+            [-1 if flip_x else 1, 0],
+            [0, -1 if flip_y else 1],
+        ],
+        dtype=torch.float32,
+    )
+    return flip_mat @ rot_mat
+def calculate_tangents(
+    vertex_positions: Float[Tensor, "Nv 3"],
+    vertex_normals: Float[Tensor, "Nv 3"],
+    triangle_idxs: Integer[Tensor, "Nf 3"],
+    face_uv: Float[Tensor, "Nf 3 2"],
+) -> Float[Tensor, "Nf 3 4"]:  # noqa: F821
+    vn_idx = [None] * 3
+    pos = [None] * 3
+    tex = face_uv.unbind(1)
+    for i in range(0, 3):
+        pos[i] = vertex_positions[triangle_idxs[:, i]]
+        # t_nrm_idx is always the same as t_pos_idx
+        vn_idx[i] = triangle_idxs[:, i]
+    tangents = torch.zeros_like(vertex_normals)
+    tansum = torch.zeros_like(vertex_normals)
+    # Compute tangent space for each triangle
+    duv1 = tex[1] - tex[0]
+    duv2 = tex[2] - tex[0]
+    dpos1 = pos[1] - pos[0]
+    dpos2 = pos[2] - pos[0]
+    tng_nom = dpos1 * duv2[..., 1:2] - dpos2 * duv1[..., 1:2]
+    denom = duv1[..., 0:1] * duv2[..., 1:2] - duv1[..., 1:2] * duv2[..., 0:1]
+    # Avoid division by zero for degenerated texture coordinates
+    denom_safe = denom.clip(1e-6)
+    tang = tng_nom / denom_safe
+    # Update all 3 vertices
+    for i in range(0, 3):
+        idx = vn_idx[i][:, None].repeat(1, 3)
+        tangents.scatter_add_(0, idx, tang)  # tangents[n_i] = tangents[n_i] + tang
+        tansum.scatter_add_(
+            0, idx, torch.ones_like(tang)
+        )  # tansum[n_i] = tansum[n_i] + 1
+    # Also normalize it. Here we do not normalize the individual triangles first so larger area
+    # triangles influence the tangent space more
+    tangents = tangents / tansum
+    # Normalize and make sure tangent is perpendicular to normal
+    tangents = F.normalize(tangents, dim=1)
+    tangents = F.normalize(tangents - dot(tangents, vertex_normals) * vertex_normals)
+    return tangents
+def _rotate_uv_slices_consistent_space(
+    vertex_positions: Float[Tensor, "Nv 3"],
+    vertex_normals: Float[Tensor, "Nv 3"],
+    triangle_idxs: Integer[Tensor, "Nf 3"],
+    uv: Float[Tensor, "Nf 3 2"],
+    index: Integer[Tensor, "Nf"],  # noqa: F821
+):
+    tangents = calculate_tangents(vertex_positions, vertex_normals, triangle_idxs, uv)
+    pos_stack = torch.stack(
+        [
+            -vertex_positions[..., 1],
+            vertex_positions[..., 0],
+            torch.zeros_like(vertex_positions[..., 0]),
+        ],
+        dim=-1,
+    )
+    expected_tangents = F.normalize(
+        torch.linalg.cross(
+            vertex_normals, torch.linalg.cross(pos_stack, vertex_normals)
+        ),
+        -1,
+    )
+    actual_tangents = tangents[triangle_idxs]
+    expected_tangents = expected_tangents[triangle_idxs]
+    def rotation_matrix_2d(theta):
+        c, s = torch.cos(theta), torch.sin(theta)
+        return torch.tensor([[c, -s], [s, c]])
+    # Now find the rotation
+    index_mod = index % 6  # Shouldn't happen. Just for safety
+    for i in range(6):
+        mask = index_mod == i
+        if not mask.any():
+            continue
+        actual_mean_tangent = actual_tangents[mask].mean(dim=(0, 1))
+        expected_mean_tangent = expected_tangents[mask].mean(dim=(0, 1))
+        dot_product = torch.dot(actual_mean_tangent, expected_mean_tangent)
+        cross_product = (
+            actual_mean_tangent[0] * expected_mean_tangent[1]
+            - actual_mean_tangent[1] * expected_mean_tangent[0]
+        )
+        angle = torch.atan2(cross_product, dot_product)
+        rot_matrix = rotation_matrix_2d(angle).to(mask.device)
+        # Center the uv coordinate to be in the range of -1 to 1 and 0 centered
+        uv_cur = uv[mask] * 2 - 1  # Center it first
+        # Rotate it
+        uv[mask] = torch.einsum("ij,nfj->nfi", rot_matrix, uv_cur)
+        # Rescale uv[mask] to be within the 0-1 range
+        uv[mask] = (uv[mask] - uv[mask].min()) / (uv[mask].max() - uv[mask].min())
+    return uv
+def _handle_slice_uvs(
+    uv: Float[Tensor, "Nf 3 2"],
+    index: Integer[Tensor, "Nf"],  # noqa: F821
+    island_padding: float,
+    max_index: int = 6 * 2,
+) -> Float[Tensor, "Nf 3 2"]:  # noqa: F821
+    uc, vc = uv.unbind(-1)
+    # Get the second slice (The first overlap)
+    index_filter = [index == i for i in range(6, max_index)]
+    # Normalize them to always fully fill the atlas patch
+    for i, fi in enumerate(index_filter):
+        if fi.sum() > 0:
+            # Scale the slice but only up to a factor of 2
+            # This keeps the texture resolution with the first slice in line (Half space in UV)
+            uc[fi] = (uc[fi] - uc[fi].min()) / (uc[fi].max() - uc[fi].min()).clip(0.5)
+            vc[fi] = (vc[fi] - vc[fi].min()) / (vc[fi].max() - vc[fi].min()).clip(0.5)
+    uc_padded = (uc * (1 - 2 * island_padding) + island_padding).clip(0, 1)
+    vc_padded = (vc * (1 - 2 * island_padding) + island_padding).clip(0, 1)
+    return torch.stack([uc_padded, vc_padded], dim=-1)
+def _handle_remaining_uvs(
+    uv: Float[Tensor, "Nf 3 2"],
+    index: Integer[Tensor, "Nf"],  # noqa: F821
+    island_padding: float,
+) -> Float[Tensor, "Nf 3 2"]:
+    uc, vc = uv.unbind(-1)
+    # Get all remaining elements
+    remaining_filter = index >= 6 * 2
+    squares_left = remaining_filter.sum()
+    if squares_left == 0:
+        return uv
+    uc = uc[remaining_filter]
+    vc = vc[remaining_filter]
+    # Or remaining triangles are distributed in a rectangle
+    # The rectangle takes 0.5 of the entire uv space in width and 1/3 in height
+    ratio = 0.5 * (1 / 3)  # 1.5
+    # sqrt(744/(0.5*(1/3)))
+    mult = math.sqrt(squares_left / ratio)
+    num_square_width = int(math.ceil(0.5 * mult))
+    num_square_height = int(math.ceil(squares_left / num_square_width))
+    width = 1 / num_square_width
+    height = 1 / num_square_height
+    # The idea is again to keep the texture resolution consistent with the first slice
+    # This only occupys half the region in the texture chart but the scaling on the squares
+    # assumes full coverage.
+    clip_val = min(width, height) * 1.5
+    # Now normalize the UVs with taking into account the maximum scaling
+    uc = (uc - uc.min(dim=1, keepdim=True).values) / (
+        uc.amax(dim=1, keepdim=True) - uc.amin(dim=1, keepdim=True)
+    ).clip(clip_val)
+    vc = (vc - vc.min(dim=1, keepdim=True).values) / (
+        vc.amax(dim=1, keepdim=True) - vc.amin(dim=1, keepdim=True)
+    ).clip(clip_val)
+    # Add a small padding
+    uc = (
+        uc * (1 - island_padding * num_square_width * 0.5)
+        + island_padding * num_square_width * 0.25
+    ).clip(0, 1)
+    vc = (
+        vc * (1 - island_padding * num_square_height * 0.5)
+        + island_padding * num_square_height * 0.25
+    ).clip(0, 1)
+    uc = uc * width
+    vc = vc * height
+    # And calculate offsets for each element
+    idx = torch.arange(uc.shape[0], device=uc.device, dtype=torch.int32)
+    x_idx = idx % num_square_width
+    y_idx = idx // num_square_width
+    # And move each triangle to its own spot
+    uc = uc + x_idx[:, None] * width
+    vc = vc + y_idx[:, None] * height
+    uc = (uc * (1 - 2 * island_padding * 0.5) + island_padding * 0.5).clip(0, 1)
+    vc = (vc * (1 - 2 * island_padding * 0.5) + island_padding * 0.5).clip(0, 1)
+    uv[remaining_filter] = torch.stack([uc, vc], dim=-1)
+    return uv
+def _distribute_individual_uvs_in_atlas(
+    face_uv: Float[Tensor, "Nf 3 2"],
+    assigned_faces: Integer[Tensor, "Nf"],  # noqa: F821
+    offset_x: Float[Tensor, "Nf"],  # noqa: F821
+    offset_y: Float[Tensor, "Nf"],  # noqa: F821
+    div_x: Float[Tensor, "Nf"],  # noqa: F821
+    div_y: Float[Tensor, "Nf"],  # noqa: F821
+    island_padding: float,
+):
+    # Place the slice first
+    placed_uv = _handle_slice_uvs(face_uv, assigned_faces, island_padding)
+    # Then handle the remaining overlap elements
+    placed_uv = _handle_remaining_uvs(placed_uv, assigned_faces, island_padding)
+    uc, vc = placed_uv.unbind(-1)
+    uc = uc / div_x[:, None] + offset_x[:, None]
+    vc = vc / div_y[:, None] + offset_y[:, None]
+    uv = torch.stack([uc, vc], dim=-1).view(-1, 2)
+    return uv
+def _get_unique_face_uv(
+    uv: Float[Tensor, "Nf 3 2"],
+) -> Tuple[Float[Tensor, "Utex 3"], Integer[Tensor, "Nf"]]:  # noqa: F821
+    unique_uv, unique_idx = torch.unique(uv, return_inverse=True, dim=0)
+    # And add the face to uv index mapping
+    vtex_idx = unique_idx.view(-1, 3)
+    return unique_uv, vtex_idx
+def _align_mesh_with_main_axis(
+    vertex_positions: Float[Tensor, "Nv 3"], vertex_normals: Float[Tensor, "Nv 3"]
+) -> Tuple[Float[Tensor, "Nv 3"], Float[Tensor, "Nv 3"]]:
+    # Use pca to find the 2 main axis (third is derived by cross product)
+    # Set the random seed so it's repeatable
+    torch.manual_seed(0)
+    _, _, v = torch.pca_lowrank(vertex_positions, q=2)
+    main_axis, seconday_axis = v[:, 0], v[:, 1]
+    main_axis: Float[Tensor, "3"] = F.normalize(main_axis, eps=1e-6, dim=-1)
+    # Orthogonalize the second axis
+    seconday_axis: Float[Tensor, "3"] = F.normalize(
+        seconday_axis - dot(seconday_axis, main_axis) * main_axis, eps=1e-6, dim=-1
+    )
+    # Create perpendicular third axis
+    third_axis: Float[Tensor, "3"] = F.normalize(
+        torch.cross(main_axis, seconday_axis), dim=-1, eps=1e-6
+    )
+    # Check to which canonical axis each aligns
+    main_axis_max_idx = main_axis.abs().argmax().item()
+    seconday_axis_max_idx = seconday_axis.abs().argmax().item()
+    third_axis_max_idx = third_axis.abs().argmax().item()
+    # Now sort the axes based on the argmax so they align with thecanonoical axes
+    # If two axes have the same argmax move one of them
+    all_possible_axis = {0, 1, 2}
+    cur_index = 1
+    while len(set([main_axis_max_idx, seconday_axis_max_idx, third_axis_max_idx])) != 3:
+        # Find missing axis
+        missing_axis = all_possible_axis - set(
+            [main_axis_max_idx, seconday_axis_max_idx, third_axis_max_idx]
+        )
+        missing_axis = missing_axis.pop()
+        # Just assign it to third axis as it had the smallest contribution to the
+        # overall shape
+        if cur_index == 1:
+            third_axis_max_idx = missing_axis
+        elif cur_index == 2:
+            seconday_axis_max_idx = missing_axis
+        else:
+            raise ValueError("Could not find 3 unique axis")
+        cur_index += 1
+    if len({main_axis_max_idx, seconday_axis_max_idx, third_axis_max_idx}) != 3:
+        raise ValueError("Could not find 3 unique axis")
+    axes = [None] * 3
+    axes[main_axis_max_idx] = main_axis
+    axes[seconday_axis_max_idx] = seconday_axis
+    axes[third_axis_max_idx] = third_axis
+    # Create rotation matrix from the individual axes
+    rot_mat = torch.stack(axes, dim=1).T
+    # Now rotate the vertex positions and vertex normals so the mesh aligns with the main axis
+    vertex_positions = torch.einsum("ij,nj->ni", rot_mat, vertex_positions)
+    vertex_normals = torch.einsum("ij,nj->ni", rot_mat, vertex_normals)
+    return vertex_positions, vertex_normals
+def box_projection_uv_unwrap(
+    vertex_positions: Float[Tensor, "Nv 3"],
+    vertex_normals: Float[Tensor, "Nv 3"],
+    triangle_idxs: Integer[Tensor, "Nf 3"],
+    island_padding: float,
+) -> Tuple[Float[Tensor, "Utex 3"], Integer[Tensor, "Nf"]]:  # noqa: F821
+    # Align the mesh with main axis directions first
+    vertex_positions, vertex_normals = _align_mesh_with_main_axis(
+        vertex_positions, vertex_normals
+    )
+    bbox: Float[Tensor, "2 3"] = torch.stack(
+        [vertex_positions.min(dim=0).values, vertex_positions.max(dim=0).values], dim=0
+    )
+    # First decide in which cube face the triangle is placed
+    face_uv, face_index = _box_assign_vertex_to_cube_face(
+        vertex_positions, vertex_normals, triangle_idxs, bbox
+    )
+    # Rotate the UV islands in a way that they align with the radial z tangent space
+    face_uv = _rotate_uv_slices_consistent_space(
+        vertex_positions, vertex_normals, triangle_idxs, face_uv, face_index
+    )
+    # Then find where where the face is placed in the atlas.
+    # This has to detect potential overlaps
+    assigned_atlas_index = _assign_faces_uv_to_atlas_index(
+        vertex_positions, triangle_idxs, face_uv, face_index
+    )
+    # Then figure out the final place in the atlas based on the assignment
+    offset_x, offset_y, div_x, div_y = _find_slice_offset_and_scale(
+        assigned_atlas_index
+    )
+    # Next distribute the faces in the uv atlas
+    placed_uv = _distribute_individual_uvs_in_atlas(
+        face_uv, assigned_atlas_index, offset_x, offset_y, div_x, div_y, island_padding
+    )
+    # And get the unique per-triangle UV coordinates
+    return _get_unique_face_uv(placed_uv)

sf3d/sf3d_system.py ADDED Viewed

	@@ -0,0 +1,482 @@

+import os
+from dataclasses import dataclass, field
+from typing import Any, List, Optional, Tuple
+import numpy as np
+import torch
+import torch.nn.functional as F
+import trimesh
+from einops import rearrange
+from huggingface_hub import hf_hub_download
+from jaxtyping import Float
+from omegaconf import OmegaConf
+from PIL import Image
+from safetensors.torch import load_model
+from torch import Tensor
+from sf3d.models.isosurface import MarchingTetrahedraHelper
+from sf3d.models.mesh import Mesh
+from sf3d.models.utils import (
+    BaseModule,
+    ImageProcessor,
+    convert_data,
+    dilate_fill,
+    dot,
+    find_class,
+    float32_to_uint8_np,
+    normalize,
+    scale_tensor,
+)
+from sf3d.utils import create_intrinsic_from_fov_deg, default_cond_c2w
+from .texture_baker import TextureBaker
+class SF3D(BaseModule):
+    @dataclass
+    class Config(BaseModule.Config):
+        cond_image_size: int
+        isosurface_resolution: int
+        isosurface_threshold: float = 10.0
+        radius: float = 1.0
+        background_color: list[float] = field(default_factory=lambda: [0.5, 0.5, 0.5])
+        default_fovy_deg: float = 40.0
+        default_distance: float = 1.6
+        camera_embedder_cls: str = ""
+        camera_embedder: dict = field(default_factory=dict)
+        image_tokenizer_cls: str = ""
+        image_tokenizer: dict = field(default_factory=dict)
+        tokenizer_cls: str = ""
+        tokenizer: dict = field(default_factory=dict)
+        backbone_cls: str = ""
+        backbone: dict = field(default_factory=dict)
+        post_processor_cls: str = ""
+        post_processor: dict = field(default_factory=dict)
+        decoder_cls: str = ""
+        decoder: dict = field(default_factory=dict)
+        image_estimator_cls: str = ""
+        image_estimator: dict = field(default_factory=dict)
+        global_estimator_cls: str = ""
+        global_estimator: dict = field(default_factory=dict)
+    cfg: Config
+    @classmethod
+    def from_pretrained(
+        cls, pretrained_model_name_or_path: str, config_name: str, weight_name: str
+    ):
+        if os.path.isdir(pretrained_model_name_or_path):
+            config_path = os.path.join(pretrained_model_name_or_path, config_name)
+            weight_path = os.path.join(pretrained_model_name_or_path, weight_name)
+        else:
+            config_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path, filename=config_name
+            )
+            weight_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path, filename=weight_name
+            )
+        cfg = OmegaConf.load(config_path)
+        OmegaConf.resolve(cfg)
+        model = cls(cfg)
+        load_model(model, weight_path)
+        return model
+    @property
+    def device(self):
+        return next(self.parameters()).device
+    def configure(self):
+        self.image_tokenizer = find_class(self.cfg.image_tokenizer_cls)(
+            self.cfg.image_tokenizer
+        )
+        self.tokenizer = find_class(self.cfg.tokenizer_cls)(self.cfg.tokenizer)
+        self.camera_embedder = find_class(self.cfg.camera_embedder_cls)(
+            self.cfg.camera_embedder
+        )
+        self.backbone = find_class(self.cfg.backbone_cls)(self.cfg.backbone)
+        self.post_processor = find_class(self.cfg.post_processor_cls)(
+            self.cfg.post_processor
+        )
+        self.decoder = find_class(self.cfg.decoder_cls)(self.cfg.decoder)
+        self.image_estimator = find_class(self.cfg.image_estimator_cls)(
+            self.cfg.image_estimator
+        )
+        self.global_estimator = find_class(self.cfg.global_estimator_cls)(
+            self.cfg.global_estimator
+        )
+        self.bbox: Float[Tensor, "2 3"]
+        self.register_buffer(
+            "bbox",
+            torch.as_tensor(
+                [
+                    [-self.cfg.radius, -self.cfg.radius, -self.cfg.radius],
+                    [self.cfg.radius, self.cfg.radius, self.cfg.radius],
+                ],
+                dtype=torch.float32,
+            ),
+        )
+        self.isosurface_helper = MarchingTetrahedraHelper(
+            self.cfg.isosurface_resolution,
+            os.path.join(
+                os.path.dirname(__file__),
+                "..",
+                "load",
+                "tets",
+                f"{self.cfg.isosurface_resolution}_tets.npz",
+            ),
+        )
+        self.baker = TextureBaker()
+        self.image_processor = ImageProcessor()
+    def triplane_to_meshes(
+        self, triplanes: Float[Tensor, "B 3 Cp Hp Wp"]
+    ) -> list[Mesh]:
+        meshes = []
+        for i in range(triplanes.shape[0]):
+            triplane = triplanes[i]
+            grid_vertices = scale_tensor(
+                self.isosurface_helper.grid_vertices.to(triplanes.device),
+                self.isosurface_helper.points_range,
+                self.bbox,
+            )
+            values = self.query_triplane(grid_vertices, triplane)
+            decoded = self.decoder(values, include=["vertex_offset", "density"])
+            sdf = decoded["density"] - self.cfg.isosurface_threshold
+            deform = decoded["vertex_offset"].squeeze(0)
+            mesh: Mesh = self.isosurface_helper(
+                sdf.view(-1, 1), deform.view(-1, 3) if deform is not None else None
+            )
+            mesh.v_pos = scale_tensor(
+                mesh.v_pos, self.isosurface_helper.points_range, self.bbox
+            )
+            meshes.append(mesh)
+        return meshes
+    def query_triplane(
+        self,
+        positions: Float[Tensor, "*B N 3"],
+        triplanes: Float[Tensor, "*B 3 Cp Hp Wp"],
+    ) -> Float[Tensor, "*B N F"]:
+        batched = positions.ndim == 3
+        if not batched:
+            # no batch dimension
+            triplanes = triplanes[None, ...]
+            positions = positions[None, ...]
+        assert triplanes.ndim == 5 and positions.ndim == 3
+        positions = scale_tensor(
+            positions, (-self.cfg.radius, self.cfg.radius), (-1, 1)
+        )
+        indices2D: Float[Tensor, "B 3 N 2"] = torch.stack(
+            (positions[..., [0, 1]], positions[..., [0, 2]], positions[..., [1, 2]]),
+            dim=-3,
+        ).to(triplanes.dtype)
+        out: Float[Tensor, "B3 Cp 1 N"] = F.grid_sample(
+            rearrange(triplanes, "B Np Cp Hp Wp -> (B Np) Cp Hp Wp", Np=3).float(),
+            rearrange(indices2D, "B Np N Nd -> (B Np) () N Nd", Np=3).float(),
+            align_corners=True,
+            mode="bilinear",
+        )
+        out = rearrange(out, "(B Np) Cp () N -> B N (Np Cp)", Np=3)
+        return out
+    def get_scene_codes(self, batch) -> Float[Tensor, "B 3 C H W"]:
+        # if batch[rgb_cond] is only one view, add a view dimension
+        if len(batch["rgb_cond"].shape) == 4:
+            batch["rgb_cond"] = batch["rgb_cond"].unsqueeze(1)
+            batch["mask_cond"] = batch["mask_cond"].unsqueeze(1)
+            batch["c2w_cond"] = batch["c2w_cond"].unsqueeze(1)
+            batch["intrinsic_cond"] = batch["intrinsic_cond"].unsqueeze(1)
+            batch["intrinsic_normed_cond"] = batch["intrinsic_normed_cond"].unsqueeze(1)
+        batch_size, n_input_views = batch["rgb_cond"].shape[:2]
+        camera_embeds: Optional[Float[Tensor, "B Nv Cc"]]
+        camera_embeds = self.camera_embedder(**batch)
+        input_image_tokens: Float[Tensor, "B Nv Cit Nit"] = self.image_tokenizer(
+            rearrange(batch["rgb_cond"], "B Nv H W C -> B Nv C H W"),
+            modulation_cond=camera_embeds,
+        )
+        input_image_tokens = rearrange(
+            input_image_tokens, "B Nv C Nt -> B (Nv Nt) C", Nv=n_input_views
+        )
+        tokens: Float[Tensor, "B Ct Nt"] = self.tokenizer(batch_size)
+        tokens = self.backbone(
+            tokens,
+            encoder_hidden_states=input_image_tokens,
+            modulation_cond=None,
+        )
+        direct_codes = self.tokenizer.detokenize(tokens)
+        scene_codes = self.post_processor(direct_codes)
+        return scene_codes, direct_codes
+    def run_image(
+        self,
+        image: Image,
+        bake_resolution: int,
+        estimate_illumination: bool = False,
+    ) -> Tuple[trimesh.Trimesh, dict[str, Any]]:
+        if image.mode != "RGBA":
+            raise ValueError("Image must be in RGBA mode")
+        img_cond = (
+            torch.from_numpy(
+                np.asarray(
+                    image.resize((self.cfg.cond_image_size, self.cfg.cond_image_size))
+                ).astype(np.float32)
+                / 255.0
+            )
+            .float()
+            .clip(0, 1)
+            .to(self.device)
+        )
+        mask_cond = img_cond[:, :, -1:]
+        rgb_cond = torch.lerp(
+            torch.tensor(self.cfg.background_color, device=self.device)[None, None, :],
+            img_cond[:, :, :3],
+            mask_cond,
+        )
+        c2w_cond = default_cond_c2w(self.cfg.default_distance).to(self.device)
+        intrinsic, intrinsic_normed_cond = create_intrinsic_from_fov_deg(
+            self.cfg.default_fovy_deg,
+            self.cfg.cond_image_size,
+            self.cfg.cond_image_size,
+        )
+        batch = {
+            "rgb_cond": rgb_cond,
+            "mask_cond": mask_cond,
+            "c2w_cond": c2w_cond.unsqueeze(0),
+            "intrinsic_cond": intrinsic.to(self.device).unsqueeze(0),
+            "intrinsic_normed_cond": intrinsic_normed_cond.to(self.device).unsqueeze(0),
+        }
+        meshes, global_dict = self.generate_mesh(
+            batch, bake_resolution, estimate_illumination
+        )
+        return meshes[0], global_dict
+    def generate_mesh(
+        self,
+        batch,
+        bake_resolution: int,
+        estimate_illumination: bool = False,
+    ) -> Tuple[List[trimesh.Trimesh], dict[str, Any]]:
+        batch["rgb_cond"] = self.image_processor(
+            batch["rgb_cond"], self.cfg.cond_image_size
+        )
+        batch["mask_cond"] = self.image_processor(
+            batch["mask_cond"], self.cfg.cond_image_size
+        )
+        scene_codes, non_postprocessed_codes = self.get_scene_codes(batch)
+        global_dict = {}
+        if self.image_estimator is not None:
+            global_dict.update(
+                self.image_estimator(batch["rgb_cond"] * batch["mask_cond"])
+            )
+        if self.global_estimator is not None and estimate_illumination:
+            global_dict.update(self.global_estimator(non_postprocessed_codes))
+        with torch.no_grad():
+            with torch.autocast(device_type="cuda", enabled=False):
+                meshes = self.triplane_to_meshes(scene_codes)
+                rets = []
+                for i, mesh in enumerate(meshes):
+                    # Check for empty mesh
+                    if mesh.v_pos.shape[0] == 0:
+                        rets.append(trimesh.Trimesh())
+                        continue
+                    mesh.unwrap_uv()
+                    # Build textures
+                    rast = self.baker.rasterize(
+                        mesh.v_tex, mesh.t_pos_idx, bake_resolution
+                    )
+                    bake_mask = self.baker.get_mask(rast)
+                    pos_bake = self.baker.interpolate(
+                        mesh.v_pos,
+                        rast,
+                        mesh.t_pos_idx,
+                        mesh.v_tex,
+                    )
+                    gb_pos = pos_bake[bake_mask]
+                    tri_query = self.query_triplane(gb_pos, scene_codes[i])[0]
+                    decoded = self.decoder(
+                        tri_query, exclude=["density", "vertex_offset"]
+                    )
+                    nrm = self.baker.interpolate(
+                        mesh.v_nrm,
+                        rast,
+                        mesh.t_pos_idx,
+                        mesh.v_tex,
+                    )
+                    gb_nrm = F.normalize(nrm[bake_mask], dim=-1)
+                    decoded["normal"] = gb_nrm
+                    # Check if any keys in global_dict start with decoded_
+                    for k, v in global_dict.items():
+                        if k.startswith("decoder_"):
+                            decoded[k.replace("decoder_", "")] = v[i]
+                    mat_out = {
+                        "albedo": decoded["features"],
+                        "roughness": decoded["roughness"],
+                        "metallic": decoded["metallic"],
+                        "normal": normalize(decoded["perturb_normal"]),
+                        "bump": None,
+                    }
+                    for k, v in mat_out.items():
+                        if v is None:
+                            continue
+                        if v.shape[0] == 1:
+                            # Skip and directly add a single value
+                            mat_out[k] = v[0]
+                        else:
+                            f = torch.zeros(
+                                bake_resolution,
+                                bake_resolution,
+                                v.shape[-1],
+                                dtype=v.dtype,
+                                device=v.device,
+                            )
+                            if v.shape == f.shape:
+                                continue
+                            if k == "normal":
+                                # Use un-normalized tangents here so that larger smaller tris
+                                # Don't effect the tangents that much
+                                tng = self.baker.interpolate(
+                                    mesh.v_tng,
+                                    rast,
+                                    mesh.t_pos_idx,
+                                    mesh.v_tex,
+                                )
+                                gb_tng = tng[bake_mask]
+                                gb_tng = F.normalize(gb_tng, dim=-1)
+                                gb_btng = F.normalize(
+                                    torch.cross(gb_tng, gb_nrm, dim=-1), dim=-1
+                                )
+                                normal = F.normalize(mat_out["normal"], dim=-1)
+                                bump = torch.cat(
+                                    # Check if we have to flip some things
+                                    (
+                                        dot(normal, gb_tng),
+                                        dot(normal, gb_btng),
+                                        dot(normal, gb_nrm).clip(
+                                            0.3, 1
+                                        ),  # Never go below 0.3. This would indicate a flipped (or close to one) normal
+                                    ),
+                                    -1,
+                                )
+                                bump = (bump * 0.5 + 0.5).clamp(0, 1)
+                                f[bake_mask] = bump.view(-1, 3)
+                                mat_out["bump"] = f
+                            else:
+                                f[bake_mask] = v.view(-1, v.shape[-1])
+                                mat_out[k] = f
+                    def uv_padding(arr):
+                        if arr.ndim == 1:
+                            return arr
+                        return (
+                            dilate_fill(
+                                arr.permute(2, 0, 1)[None, ...],
+                                bake_mask.unsqueeze(0).unsqueeze(0),
+                                iterations=bake_resolution // 150,
+                            )
+                            .squeeze(0)
+                            .permute(1, 2, 0)
+                        )
+                    verts_np = convert_data(mesh.v_pos)
+                    faces = convert_data(mesh.t_pos_idx)
+                    uvs = convert_data(mesh.v_tex)
+                    basecolor_tex = Image.fromarray(
+                        float32_to_uint8_np(convert_data(uv_padding(mat_out["albedo"])))
+                    ).convert("RGB")
+                    basecolor_tex.format = "JPEG"
+                    metallic = mat_out["metallic"].squeeze().cpu().item()
+                    roughness = mat_out["roughness"].squeeze().cpu().item()
+                    if "bump" in mat_out and mat_out["bump"] is not None:
+                        bump_np = convert_data(uv_padding(mat_out["bump"]))
+                        bump_up = np.ones_like(bump_np)
+                        bump_up[..., :2] = 0.5
+                        bump_up[..., 2:] = 1
+                        bump_tex = Image.fromarray(
+                            float32_to_uint8_np(
+                                bump_np,
+                                dither=True,
+                                # Do not dither if something is perfectly flat
+                                dither_mask=np.all(
+                                    bump_np == bump_up, axis=-1, keepdims=True
+                                ).astype(np.float32),
+                            )
+                        ).convert("RGB")
+                        bump_tex.format = (
+                            "JPEG"  # PNG would be better but the assets are larger
+                        )
+                    else:
+                        bump_tex = None
+                    material = trimesh.visual.material.PBRMaterial(
+                        baseColorTexture=basecolor_tex,
+                        roughnessFactor=roughness,
+                        metallicFactor=metallic,
+                        normalTexture=bump_tex,
+                    )
+                    tmesh = trimesh.Trimesh(
+                        vertices=verts_np,
+                        faces=faces,
+                        visual=trimesh.visual.texture.TextureVisuals(
+                            uv=uvs, material=material
+                        ),
+                    )
+                    rot = trimesh.transformations.rotation_matrix(
+                        np.radians(-90), [1, 0, 0]
+                    )
+                    tmesh.apply_transform(rot)
+                    tmesh.apply_transform(
+                        trimesh.transformations.rotation_matrix(
+                            np.radians(90), [0, 1, 0]
+                        )
+                    )
+                    tmesh.invert()
+                    rets.append(tmesh)
+        return rets, global_dict

sf3d/sf3d_texture_baker.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import slangtorch
+import torch
+import torch.nn as nn
+from jaxtyping import Bool, Float
+from torch import Tensor
+class TextureBaker(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.baker = slangtorch.loadModule(
+            os.path.join(os.path.dirname(__file__), "texture_baker.slang")
+        )
+    def rasterize(
+        self,
+        uv: Float[Tensor, "Nv 2"],
+        face_indices: Float[Tensor, "Nf 3"],
+        bake_resolution: int,
+    ) -> Float[Tensor, "bake_resolution bake_resolution 4"]:
+        if not face_indices.is_cuda or not uv.is_cuda:
+            raise ValueError("All input tensors must be on cuda")
+        face_indices = face_indices.to(torch.int32)
+        uv = uv.to(torch.float32)
+        rast_result = torch.empty(
+            bake_resolution, bake_resolution, 4, device=uv.device, dtype=torch.float32
+        )
+        block_size = 16
+        grid_size = bake_resolution // block_size
+        self.baker.bake_uv(uv=uv, indices=face_indices, output=rast_result).launchRaw(
+            blockSize=(block_size, block_size, 1), gridSize=(grid_size, grid_size, 1)
+        )
+        return rast_result
+    def get_mask(
+        self, rast: Float[Tensor, "bake_resolution bake_resolution 4"]
+    ) -> Bool[Tensor, "bake_resolution bake_resolution"]:
+        return rast[..., -1] >= 0
+    def interpolate(
+        self,
+        attr: Float[Tensor, "Nv 3"],
+        rast: Float[Tensor, "bake_resolution bake_resolution 4"],
+        face_indices: Float[Tensor, "Nf 3"],
+        uv: Float[Tensor, "Nv 2"],
+    ) -> Float[Tensor, "bake_resolution bake_resolution 3"]:
+        # Make sure all input tensors are on torch
+        if not attr.is_cuda or not face_indices.is_cuda or not rast.is_cuda:
+            raise ValueError("All input tensors must be on cuda")
+        attr = attr.to(torch.float32)
+        face_indices = face_indices.to(torch.int32)
+        uv = uv.to(torch.float32)
+        pos_bake = torch.zeros(
+            rast.shape[0],
+            rast.shape[1],
+            3,
+            device=attr.device,
+            dtype=attr.dtype,
+        )
+        block_size = 16
+        grid_size = rast.shape[0] // block_size
+        self.baker.interpolate(
+            attr=attr, indices=face_indices, rast=rast, output=pos_bake
+        ).launchRaw(
+            blockSize=(block_size, block_size, 1), gridSize=(grid_size, grid_size, 1)
+        )
+        return pos_bake
+    def forward(
+        self,
+        attr: Float[Tensor, "Nv 3"],
+        uv: Float[Tensor, "Nv 2"],
+        face_indices: Float[Tensor, "Nf 3"],
+        bake_resolution: int,
+    ) -> Float[Tensor, "bake_resolution bake_resolution 3"]:
+        rast = self.rasterize(uv, face_indices, bake_resolution)
+        return self.interpolate(attr, rast, face_indices, uv)

sf3d/sf3d_texture_baker.slang ADDED Viewed

	@@ -0,0 +1,93 @@

+// xy: 2D test position
+// v1: vertex position 1
+// v2: vertex position 2
+// v3: vertex position 3
+//
+bool barycentric_coordinates(float2 xy, float2 v1, float2 v2, float2 v3, out float u, out float v, out float w)
+{
+    // Return true if the point (x,y) is inside the triangle defined by the vertices v1, v2, v3.
+    // If the point is inside the triangle, the barycentric coordinates are stored in u, v, and w.
+    float2 v1v2 = v2 - v1;
+    float2 v1v3 = v3 - v1;
+    float2 xyv1 = xy - v1;
+    float d00 = dot(v1v2, v1v2);
+    float d01 = dot(v1v2, v1v3);
+    float d11 = dot(v1v3, v1v3);
+    float d20 = dot(xyv1, v1v2);
+    float d21 = dot(xyv1, v1v3);
+    float denom = d00 * d11 - d01 * d01;
+    v = (d11 * d20 - d01 * d21) / denom;
+    w = (d00 * d21 - d01 * d20) / denom;
+    u = 1.0 - v - w;
+    return (v >= 0.0) && (w >= 0.0) && (v + w <= 1.0);
+}
+[AutoPyBindCUDA]
+[CUDAKernel]
+void interpolate(
+    TensorView<float3> attr,
+    TensorView<int3> indices,
+    TensorView<float4> rast,
+    TensorView<float3> output)
+{
+    // Interpolate the attr into output based on the rast result (barycentric coordinates, + triangle idx)
+    uint3 dispatch_id = cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx();
+    if (dispatch_id.x > output.size(0) || dispatch_id.y > output.size(1))
+        return;
+    float4 barycentric = rast[dispatch_id.x, dispatch_id.y];
+    int triangle_idx = int(barycentric.w);
+    if (triangle_idx < 0) {
+        output[dispatch_id.x, dispatch_id.y] = float3(0.0, 0.0, 0.0);
+        return;
+    }
+    float3 v1 = attr[indices[triangle_idx].x];
+    float3 v2 = attr[indices[triangle_idx].y];
+    float3 v3 = attr[indices[triangle_idx].z];
+    output[dispatch_id.x, dispatch_id.y] = v1 * barycentric.x + v2 * barycentric.y + v3 * barycentric.z;
+}
+[AutoPyBindCUDA]
+[CUDAKernel]
+void bake_uv(
+    TensorView<float2> uv,
+    TensorView<int3> indices,
+    TensorView<float4> output)
+{
+    uint3 dispatch_id = cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx();
+    if (dispatch_id.y > output.size(0) || dispatch_id.x > output.size(1))
+        return;
+    // We index x,y but the orginal coords are HW. So swap them
+    float2 pixel_coord = float2(dispatch_id.y, dispatch_id.x);
+    // Normalize to [0, 1]
+    pixel_coord /= float2(output.size(1), output.size(0));
+    pixel_coord = clamp(pixel_coord, 0.0, 1.0);
+    // Flip x-axis
+    pixel_coord.y = 1 - pixel_coord.y;
+    for (int i = 0; i < indices.size(0); i++) {
+        float2 v1 = float2(uv[indices[i].x].x, uv[indices[i].x].y);
+        float2 v2 = float2(uv[indices[i].y].x, uv[indices[i].y].y);
+        float2 v3 = float2(uv[indices[i].z].x, uv[indices[i].z].y);
+        float u, v, w;
+        bool hit = barycentric_coordinates(pixel_coord, v1, v2, v3, u, v, w);
+        if (hit){
+            output[dispatch_id.x, dispatch_id.y] = float4(u, v, w, i);
+            return;
+        }
+    }
+    output[dispatch_id.x, dispatch_id.y] = float4(0.0, 0.0, 0.0, -1);
+}

sf3d/sf3d_utils.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from typing import Any
+import numpy as np
+import rembg
+import torch
+from PIL import Image
+import sf3d.models.utils as sf3d_utils
+def create_intrinsic_from_fov_deg(fov_deg: float, cond_height: int, cond_width: int):
+    intrinsic = sf3d_utils.get_intrinsic_from_fov(
+        np.deg2rad(fov_deg),
+        H=cond_height,
+        W=cond_width,
+    )
+    intrinsic_normed_cond = intrinsic.clone()
+    intrinsic_normed_cond[..., 0, 2] /= cond_width
+    intrinsic_normed_cond[..., 1, 2] /= cond_height
+    intrinsic_normed_cond[..., 0, 0] /= cond_width
+    intrinsic_normed_cond[..., 1, 1] /= cond_height
+    return intrinsic, intrinsic_normed_cond
+def default_cond_c2w(distance: float):
+    c2w_cond = torch.as_tensor(
+        [
+            [0, 0, 1, distance],
+            [1, 0, 0, 0],
+            [0, 1, 0, 0],
+            [0, 0, 0, 1],
+        ]
+    ).float()
+    return c2w_cond
+def remove_background(
+    image: Image,
+    rembg_session: Any = None,
+    force: bool = False,
+    **rembg_kwargs,
+) -> Image:
+    do_remove = True
+    if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
+        do_remove = False
+    do_remove = do_remove or force
+    if do_remove:
+        image = rembg.remove(image, session=rembg_session, **rembg_kwargs)
+    return image
+def resize_foreground(
+    image: Image,
+    ratio: float,
+) -> Image:
+    image = np.array(image)
+    assert image.shape[-1] == 4
+    alpha = np.where(image[..., 3] > 0)
+    y1, y2, x1, x2 = (
+        alpha[0].min(),
+        alpha[0].max(),
+        alpha[1].min(),
+        alpha[1].max(),
+    )
+    # crop the foreground
+    fg = image[y1:y2, x1:x2]
+    # pad to square
+    size = max(fg.shape[0], fg.shape[1])
+    ph0, pw0 = (size - fg.shape[0]) // 2, (size - fg.shape[1]) // 2
+    ph1, pw1 = size - fg.shape[0] - ph0, size - fg.shape[1] - pw0
+    new_image = np.pad(
+        fg,
+        ((ph0, ph1), (pw0, pw1), (0, 0)),
+        mode="constant",
+        constant_values=((0, 0), (0, 0), (0, 0)),
+    )
+    # compute padding according to the ratio
+    new_size = int(new_image.shape[0] / ratio)
+    # pad to size, double side
+    ph0, pw0 = (new_size - size) // 2, (new_size - size) // 2
+    ph1, pw1 = new_size - size - ph0, new_size - size - pw0
+    new_image = np.pad(
+        new_image,
+        ((ph0, ph1), (pw0, pw1), (0, 0)),
+        mode="constant",
+        constant_values=((0, 0), (0, 0), (0, 0)),
+    )
+    new_image = Image.fromarray(new_image, mode="RGBA")
+    return new_image