Spaces:

hyz317
/

StdGEN

Running on L40S

App Files Files Community

YulianSa commited on 3 days ago

Commit

914c133

1 Parent(s): e6f7ceb

update

Browse files

Files changed (3) hide show

app.py +67 -20
infer_refine.py +67 -66
pre-requirements.txt +1 -0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 import glob
 import torch
 import random
 from tempfile import NamedTemporaryFile
 from PIL import Image
 import os
@@ -74,29 +75,73 @@ If you find our work useful for your research or applications, please cite using
 If you have any questions, feel free to open a discussion or contact us at <b>hyz22@mails.tsinghua.edu.cn</b>.
 """
 # 示例占位函数 - 需替换实际模型
 def arbitrary_to_apose(image, seed):
     # convert image to PIL.Image
     image = Image.fromarray(image)
-    return infer_api.genStage1(image, seed)
 def apose_to_multiview(apose_img, seed):
     # convert image to PIL.Image
     apose_img = Image.fromarray(apose_img)
-    results = infer_api.genStage2(apose_img, seed, num_levels=1)
-    infer_api.add_results(results)
-    return results[0]["images"]
-def multiview_to_mesh(images):
-    mesh_files = infer_api.genStage3(images)
-    return mesh_files
-def refine_mesh(apose_img, mesh1, mesh2, mesh3, seed):
     apose_img = Image.fromarray(apose_img)
-    results = infer_api.genStage2(apose_img, seed, num_levels=2)
-    infer_api.add_results(results)
-    print(infer_api.results.keys())
-    refined = infer_api.genStage4([mesh1, mesh2, mesh3], infer_api.results)
     return refined
 with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation from Single Images") as demo:
@@ -112,7 +157,7 @@ with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation fr
             )
             seed_input = gr.Number(
                 label="Seed",
-                value=50,
                 precision=0,
                 interactive=True
             )
@@ -131,6 +176,7 @@ with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation fr
                 precision=0,
                 interactive=True
             )
             view_btn = gr.Button("Generate Multi-view Images")
         with gr.Column():
@@ -141,6 +187,7 @@ with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation fr
                 interactive=False,
                 height="None"
             )
             mesh_btn = gr.Button("Reconstruct")
     with gr.Row():
@@ -165,20 +212,20 @@ with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation fr
     view_btn.click(
         apose_to_multiview,
         inputs=[a_pose_image, seed_input2],
-        outputs=multiview_gallery
     )
     mesh_btn.click(
         multiview_to_mesh,
-        inputs=multiview_gallery,
-        outputs=[*mesh_cols, full_mesh]
     )
     refine_btn.click(
         refine_mesh,
-        inputs=[a_pose_image, *mesh_cols, seed_input2],
         outputs=[refined_meshes[2], refined_meshes[0], refined_meshes[1], refined_full_mesh]
     )
 if __name__ == "__main__":
-    demo.launch()

 import glob
 import torch
 import random
+import imagehash
 from tempfile import NamedTemporaryFile
 from PIL import Image
 import os
 If you have any questions, feel free to open a discussion or contact us at <b>hyz22@mails.tsinghua.edu.cn</b>.
 """
+cache_arbitrary = {}
+cache_multiview = [ {}, {}, {} ]
+cache_slrm = {}
+cache_refine = {}
+tmp_path = '/tmp'
 # 示例占位函数 - 需替换实际模型
 def arbitrary_to_apose(image, seed):
     # convert image to PIL.Image
     image = Image.fromarray(image)
+    image_hash = str(imagehash.average_hash(image)) + '_' + str(seed)
+    if image_hash not in cache_arbitrary:
+        apose_img = infer_api.genStage1(image, seed)
+        apose_img.save(f'{tmp_path}/{image_hash}.png')
+        cache_arbitrary[image_hash] = f'{tmp_path}/{image_hash}.png'
+        print(f'cached apose image: {image_hash}')
+        return apose_img
+    else:
+        apose_img = Image.open(cache_arbitrary[image_hash])
+        print(f'loaded cached apose image: {image_hash}')
+        return apose_img
 def apose_to_multiview(apose_img, seed):
     # convert image to PIL.Image
     apose_img = Image.fromarray(apose_img)
+    image_hash = str(imagehash.average_hash(apose_img)) + '_' + str(seed)
+    if image_hash not in cache_multiview[0]:
+        results = infer_api.genStage2(apose_img, seed, num_levels=1)
+        for idx, img in enumerate(results[0]["images"]):
+            img.save(f'{tmp_path}/{image_hash}_images_{idx}.png')
+        for idx, img in enumerate(results[0]["normals"]):
+            img.save(f'{tmp_path}/{image_hash}_normals_{idx}.png')
+        cache_multiview[0][image_hash] = {
+            "images": [f'{tmp_path}/{image_hash}_images_{idx}.png' for idx in range(len(results[0]["images"]))],
+            "normals": [f'{tmp_path}/{image_hash}_normals_{idx}.png' for idx in range(len(results[0]["normals"]))]
+        }
+        print(f'cached multiview images: {image_hash}')
+        return results[0]["images"], image_hash
+    else:
+        print(f'loaded cached multiview images: {image_hash}')
+        return [Image.open(img_path) for img_path in cache_multiview[0][image_hash]["images"]], image_hash
+def multiview_to_mesh(images, image_hash):
+    if image_hash not in cache_slrm:
+        mesh_files = infer_api.genStage3(images)
+        cache_slrm[image_hash] = mesh_files
+        print(f'cached slrm files: {image_hash}')
+    else:
+        mesh_files = cache_slrm[image_hash]
+        print(f'loaded cached slrm files: {image_hash}')
+    return *mesh_files, image_hash
+def refine_mesh(apose_img, mesh1, mesh2, mesh3, seed, image_hash):
     apose_img = Image.fromarray(apose_img)
+    if image_hash not in cache_refine:
+        results = infer_api.genStage2(apose_img, seed, num_levels=2)
+        results[0] = {}
+        results[0]["images"] = [Image.open(img_path) for img_path in cache_multiview[0][image_hash]["images"]]
+        results[0]["normals"] = [Image.open(img_path) for img_path in cache_multiview[0][image_hash]["normals"]]
+        refined = infer_api.genStage4([mesh1, mesh2, mesh3], results)
+        cache_refine[image_hash] = refined
+        print(f'cached refined mesh: {image_hash}')
+    else:
+        refined = cache_refine[image_hash]
+        print(f'loaded cached refined mesh: {image_hash}')
     return refined
 with gr.Blocks(title="StdGEN: Semantically Decomposed 3D Character Generation from Single Images") as demo:
             )
             seed_input = gr.Number(
                 label="Seed",
+                value=52,
                 precision=0,
                 interactive=True
             )
                 precision=0,
                 interactive=True
             )
+            state2 = gr.State(value="")
             view_btn = gr.Button("Generate Multi-view Images")
         with gr.Column():
                 interactive=False,
                 height="None"
             )
+            state3 = gr.State(value="")
             mesh_btn = gr.Button("Reconstruct")
     with gr.Row():
     view_btn.click(
         apose_to_multiview,
         inputs=[a_pose_image, seed_input2],
+        outputs=[multiview_gallery, state2]
     )
     mesh_btn.click(
         multiview_to_mesh,
+        inputs=[multiview_gallery, state2],
+        outputs=[*mesh_cols, full_mesh, state3]
     )
     refine_btn.click(
         refine_mesh,
+        inputs=[a_pose_image, *mesh_cols, seed_input2, state3],
         outputs=[refined_meshes[2], refined_meshes[0], refined_meshes[1], refined_full_mesh]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=True, server_port=24527)

infer_refine.py CHANGED Viewed

@@ -16,16 +16,16 @@ from sklearn.neighbors import KDTree
 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
-sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
-generator = SamAutomaticMaskGenerator(
-    model=sam,
-    points_per_side=64,
-    pred_iou_thresh=0.80,
-    stability_score_thresh=0.92,
-    crop_n_layers=1,
-    crop_n_points_downscale_factor=2,
-    min_mask_region_area=100,
-)
 def fix_vert_color_glb(mesh_path):
@@ -49,28 +49,59 @@ def srgb_to_linear(c_srgb):
     return c_linear.clip(0, 1.)
 def save_py3dmesh_with_trimesh_fast(meshes: Meshes, save_glb_path, apply_sRGB_to_LinearRGB=True):
-    # convert from pytorch3d meshes to trimesh mesh
     vertices = meshes.verts_packed().cpu().float().numpy()
     triangles = meshes.faces_packed().cpu().long().numpy()
-    np_color = meshes.textures.verts_features_packed().cpu().float().numpy()
     if save_glb_path.endswith(".glb"):
-        # rotate 180 along +Y
         vertices[:, [0, 2]] = -vertices[:, [0, 2]]
-    if apply_sRGB_to_LinearRGB:
-        np_color = srgb_to_linear(np_color)
-    assert vertices.shape[0] == np_color.shape[0]
-    assert np_color.shape[1] == 3
-    assert 0 <= np_color.min() and np_color.max() <= 1.001, f"min={np_color.min()}, max={np_color.max()}"
-    np_color = np.clip(np_color, 0, 1)
-    mesh = trimesh.Trimesh(vertices=vertices, faces=triangles, vertex_colors=np_color)
     mesh.remove_unreferenced_vertices()
-    # save mesh
     mesh.export(save_glb_path)
-    if save_glb_path.endswith(".glb"):
-        fix_vert_color_glb(save_glb_path)
-    print(f"saving to {save_glb_path}")
 def calc_horizontal_offset(target_img, source_img):
@@ -124,43 +155,7 @@ def get_distract_mask(color_0, color_1, normal_0=None, normal_1=None, thres=0.25
         max_x, max_y = bbox.max(axis=0)
         distract_bbox[min_x:max_x, min_y:max_y] = 1
-    points = np.array(random_sampled_points)[:, ::-1]
-    labels = np.ones(len(points), dtype=np.int32)
-    masks = generator.generate((color_1 * 255).astype(np.uint8))
-    outside_area = np.abs(color_0 - color_1).sum(axis=-1) < outside_thres
-    final_mask = np.zeros_like(distract_mask)
-    for iii, mask in enumerate(masks):
-        mask['segmentation'] = cv2.resize(mask['segmentation'].astype(np.float32), (1024, 1024)) > 0.5
-        intersection = np.logical_and(mask['segmentation'], distract_mask).sum()
-        total = mask['segmentation'].sum()
-        iou = intersection / total
-        outside_intersection = np.logical_and(mask['segmentation'], outside_area).sum()
-        outside_total = mask['segmentation'].sum()
-        outside_iou = outside_intersection / outside_total
-        if iou > ratio and outside_iou < outside_ratio:
-            final_mask |= mask['segmentation']
-    # calculate coverage
-    intersection = np.logical_and(final_mask, distract_mask).sum()
-    total = distract_mask.sum()
-    coverage = intersection / total
-    if coverage < 0.8:
-        # use original distract mask
-        final_mask = (distract_mask.copy() * 255).astype(np.uint8)
-        final_mask = cv2.dilate(final_mask, np.ones((3, 3), np.uint8), iterations=3)
-        labeled_array_dilate, num_features_dilate = scipy.ndimage.label(final_mask)
-        for i in range(num_features_dilate + 1):
-            if np.sum(labeled_array_dilate == i) < 200:
-                final_mask[labeled_array_dilate == i] = 255
-        final_mask = cv2.erode(final_mask, np.ones((3, 3), np.uint8), iterations=3)
-        final_mask = final_mask > 127
-    return distract_mask, distract_bbox, random_sampled_points, final_mask
 if __name__ == '__main__':
@@ -172,6 +167,9 @@ if __name__ == '__main__':
     parser.add_argument('--no_decompose', action='store_true')
     args = parser.parse_args()
     for test_idx in os.listdir(args.input_mv_dir):
         mv_root_dir = os.path.join(args.input_mv_dir, test_idx)
         obj_dir = os.path.join(args.input_obj_dir, test_idx)
@@ -228,7 +226,7 @@ if __name__ == '__main__':
                     normals.append(normal)
                 if last_front_color is not None and level == 0:
-                    original_mask, distract_bbox, _, distract_mask = get_distract_mask(last_front_color, np.array(colors[0]).astype(np.float32) / 255.0, outside_ratio=args.outside_ratio)
                     cv2.imwrite(f'{args.output_dir}/{test_idx}/distract_mask.png', distract_mask.astype(np.uint8) * 255)
                 else:
                     distract_mask = None
@@ -275,7 +273,7 @@ if __name__ == '__main__':
                 # my mesh flow weight by nearest vertexs
                 try:
                     if fixed_v is not None and fixed_f is not None and level != 0:
-                        new_mesh_v = new_mesh.verts_packed().cpu().numpy()
                         fixed_v_cpu = fixed_v.cpu().numpy()
                         kdtree_anchor = KDTree(fixed_v_cpu)
@@ -297,14 +295,13 @@ if __name__ == '__main__':
                         weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
                         new_mesh_v += weighted_vec_anchor.cpu().numpy()
-                        # replace new_mesh verts with new_mesh_v
-                        new_mesh = Meshes(verts=[torch.tensor(new_mesh_v, device='cuda')], faces=new_mesh.faces_list(), textures=new_mesh.textures)
                 except Exception as e:
                     pass
                 os.makedirs(f'{args.output_dir}/{test_idx}', exist_ok=True)
-                save_py3dmesh_with_trimesh_fast(new_mesh, f'{args.output_dir}/{test_idx}/out_{level}.glb', apply_sRGB_to_LinearRGB=False)
                 if fixed_v is None:
                     fixed_v, fixed_f = simp_v, simp_f
@@ -312,6 +309,10 @@ if __name__ == '__main__':
                     fixed_f = torch.cat([fixed_f, simp_f + fixed_v.shape[0]], dim=0)
                     fixed_v = torch.cat([fixed_v, simp_v], dim=0)
         else:
             mesh = trimesh.load(obj_dir + f'_0.obj')

 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+# sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
+# generator = SamAutomaticMaskGenerator(
+#     model=sam,
+#     points_per_side=64,
+#     pred_iou_thresh=0.80,
+#     stability_score_thresh=0.92,
+#     crop_n_layers=1,
+#     crop_n_points_downscale_factor=2,
+#     min_mask_region_area=100,
+# )
 def fix_vert_color_glb(mesh_path):
     return c_linear.clip(0, 1.)
+import trimesh
+import numpy as np
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer import TexturesUV
 def save_py3dmesh_with_trimesh_fast(meshes: Meshes, save_glb_path, apply_sRGB_to_LinearRGB=True):
+    # Convert from pytorch3d meshes to trimesh mesh
     vertices = meshes.verts_packed().cpu().float().numpy()
     triangles = meshes.faces_packed().cpu().long().numpy()
+    # Check if the mesh uses TexturesUV
+    if isinstance(meshes.textures, TexturesUV):
+        # Extract UV coordinates and texture map
+        verts_uvs = meshes.textures.verts_uvs_padded()[0].cpu().numpy()  # UV coordinates (N, 2)
+        faces_uvs = meshes.textures.faces_uvs_padded()[0].cpu().numpy()  # UV face indices (M, 3)
+        texture_map = meshes.textures.maps_padded()[0].cpu().numpy()      # Texture map (H, W, 3 or 4)
+        # Convert texture map to trimesh-compatible format
+        if apply_sRGB_to_LinearRGB:
+            texture_map = srgb_to_linear(texture_map)
+        texture_map = np.clip(texture_map, 0, 1)  # Ensure values are in [0, 1]
+        material = trimesh.visual.texture.SimpleMaterial(image=texture_data, diffuse=(255, 255, 255))
+        # Create a trimesh.Trimesh object with UVs and texture
+        mesh = trimesh.Trimesh(
+            vertices=vertices,
+            faces=triangles,
+            visual=trimesh.visual.TextureVisuals(
+                uv=verts_uvs,  # UV coordinates
+                image=texture_map,  # Texture map
+                material=material  # Material with texture
+            )
+        )
+    else:
+        # Fallback to vertex colors if TexturesUV is not used
+        np_color = meshes.textures.verts_features_packed().cpu().float().numpy()
+        if apply_sRGB_to_LinearRGB:
+            np_color = srgb_to_linear(np_color)
+        np_color = np.clip(np_color, 0, 1)
+        mesh = trimesh.Trimesh(vertices=vertices, faces=triangles, vertex_colors=np_color)
+    # Rotate 180 degrees along +Y if saving as GLB
     if save_glb_path.endswith(".glb"):
         vertices[:, [0, 2]] = -vertices[:, [0, 2]]
+    # Remove unreferenced vertices
     mesh.remove_unreferenced_vertices()
+    # Save mesh
     mesh.export(save_glb_path)
+    # if save_glb_path.endswith(".glb"):
+    #     fix_vert_color_glb(save_glb_path)
+    print(f"Saving to {save_glb_path}")
 def calc_horizontal_offset(target_img, source_img):
         max_x, max_y = bbox.max(axis=0)
         distract_bbox[min_x:max_x, min_y:max_y] = 1
+    return distract_mask, distract_bbox, _, _
 if __name__ == '__main__':
     parser.add_argument('--no_decompose', action='store_true')
     args = parser.parse_args()
+    import time
+    start_time = time.time()
     for test_idx in os.listdir(args.input_mv_dir):
         mv_root_dir = os.path.join(args.input_mv_dir, test_idx)
         obj_dir = os.path.join(args.input_obj_dir, test_idx)
                     normals.append(normal)
                 if last_front_color is not None and level == 0:
+                    distract_mask, distract_bbox, _, _ = get_distract_mask(last_front_color, np.array(colors[0]).astype(np.float32) / 255.0, outside_ratio=args.outside_ratio)
                     cv2.imwrite(f'{args.output_dir}/{test_idx}/distract_mask.png', distract_mask.astype(np.uint8) * 255)
                 else:
                     distract_mask = None
                 # my mesh flow weight by nearest vertexs
                 try:
                     if fixed_v is not None and fixed_f is not None and level != 0:
+                        new_mesh_v = new_mesh.vertices.copy()
                         fixed_v_cpu = fixed_v.cpu().numpy()
                         kdtree_anchor = KDTree(fixed_v_cpu)
                         weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
                         new_mesh_v += weighted_vec_anchor.cpu().numpy()
+                        new_mesh.vertices = new_mesh_v
                 except Exception as e:
                     pass
                 os.makedirs(f'{args.output_dir}/{test_idx}', exist_ok=True)
+                new_mesh.export(f'{args.output_dir}/{test_idx}/out_{level}.glb')
                 if fixed_v is None:
                     fixed_v, fixed_f = simp_v, simp_f
                     fixed_f = torch.cat([fixed_f, simp_f + fixed_v.shape[0]], dim=0)
                     fixed_v = torch.cat([fixed_v, simp_v], dim=0)
+                # input("Press Enter to continue...")
+            print('finish', time.time() - start_time)
         else:
             mesh = trimesh.load(obj_dir + f'_0.obj')

pre-requirements.txt CHANGED Viewed

@@ -23,3 +23,4 @@ scikit-learn
 pygltflib
 pymeshlab==2022.2.post3
 pytorch_lightning

 pygltflib
 pymeshlab==2022.2.post3
 pytorch_lightning
+imagehash