import gradio as gr import numpy as np import torch import PIL from PIL import Image import os import sys import rembg import time import json import cv2 from datetime import datetime from einops import repeat, rearrange from omegaconf import OmegaConf from typing import Dict, Optional, Tuple, List from dataclasses import dataclass from .utils import * from huggingface_hub import hf_hub_download import spaces parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class GenMVImage(object): def __init__(self, device): self.seed = 1024 self.guidance_scale = 7.5 self.step = 50 self.device = device from .third_party.CRM.pipelines import TwoStagePipeline stage1_config = OmegaConf.load(f"{parent_dir}/apps/third_party/CRM/configs/nf7_v3_SNR_rd_size_stroke.yaml").config stage1_sampler_config = stage1_config.sampler stage1_model_config = stage1_config.models stage1_model_config.resume = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth", repo_type="model") stage1_model_config.config = f"{parent_dir}/apps/third_party/CRM/" + stage1_model_config.config self.crm_pipeline = TwoStagePipeline( stage1_model_config, stage1_sampler_config, device=self.device, dtype=torch.float16 ) self.crm_pipeline.set_seed(self.seed) sys.path.append(f"{parent_dir}/apps/third_party/Wonder3D") from diffusers import DiffusionPipeline # only tested on diffusers[torch]==0.19.3, may have conflicts with newer versions of diffusers self.wonder3d_pipeline = DiffusionPipeline.from_pretrained( 'flamehaze1115/wonder3d-v1.0', # or use local checkpoint './ckpts' custom_pipeline='flamehaze1115/wonder3d-pipeline', torch_dtype=torch.float16 ) self.wonder3d_pipeline.unet.enable_xformers_memory_efficient_attention() self.wonder3d_pipeline.to(self.device) self.wonder3d_pipeline.set_progress_bar_config(disable=True) sys.path.append(f"{parent_dir}/apps/third_party/mvdream_diffusers") from .third_party.mvdream_diffusers.pipeline_mvdream import MVDreamPipeline self.mvdream_pipeline = MVDreamPipeline.from_pretrained( "ashawkey/mvdream-sd2.1-diffusers", # remote weights torch_dtype=torch.float16, trust_remote_code=True, ) self.mvdream_pipeline = self.mvdream_pipeline.to(self.device) # self.imagedream_pipeline = MVDreamPipeline.from_pretrained( # "ashawkey/imagedream-ipmv-diffusers", # remote weights # torch_dtype=torch.float16, # trust_remote_code=True, # ) # self.imagedream_pipeline = self.imagedream_pipeline.to(self.device) @spaces.GPU def gen_image_from_crm(self, image): rt_dict = self.crm_pipeline( image, scale=self.guidance_scale, step=self.step ) mv_imgs = rt_dict["stage1_images"] return mv_imgs[5], mv_imgs[3], mv_imgs[2], mv_imgs[0] @spaces.GPU def gen_image_from_mvdream(self, image, text): if image is None: mv_imgs = self.mvdream_pipeline( text, negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy", num_inference_steps=self.step, guidance_scale=self.guidance_scale, generator = torch.Generator(self.device).manual_seed(self.seed) ) elif text is not None: image = np.array(image) image = image.astype(np.float32) / 255.0 image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4]) mv_imgs = self.imagedream_pipeline( text, image, negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy", num_inference_steps=self.step, guidance_scale=self.guidance_scale, generator = torch.Generator(self.device).manual_seed(self.seed) ) return mv_imgs[1], mv_imgs[2], mv_imgs[3], mv_imgs[0] @spaces.GPU def gen_image_from_wonder3d(self, image, crop_size): weight_dtype = torch.float16 batch = prepare_data(image, crop_size) generator = torch.Generator(device=self.wonder3d_pipeline.unet.device).manual_seed(self.seed) # repeat (2B, Nv, 3, H, W) imgs_in = torch.cat([batch['imgs_in']] * 2, dim=0).to(weight_dtype) # (2B, Nv, Nce) camera_embeddings = torch.cat([batch['camera_embeddings']] * 2, dim=0).to(weight_dtype) task_embeddings = torch.cat([batch['normal_task_embeddings'], batch['color_task_embeddings']], dim=0).to(weight_dtype) camera_embeddings = torch.cat([camera_embeddings, task_embeddings], dim=-1).to(weight_dtype) # (B*Nv, 3, H, W) imgs_in = rearrange(imgs_in, "Nv C H W -> (Nv) C H W") # (B*Nv, Nce) out = self.wonder3d_pipeline( imgs_in, # camera_embeddings, generator=generator, guidance_scale=self.guidance_scale, num_inference_steps=self.step, output_type='pt', num_images_per_prompt=1, **{'eta': 1.0}, ).images bsz = out.shape[0] // 2 normals_pred = out[:bsz] images_pred = out[bsz:] normals_pred = [save_image(normals_pred[i]) for i in range(bsz)] images_pred = [save_image(images_pred[i]) for i in range(bsz)] mv_imgs = images_pred return mv_imgs[0], mv_imgs[2], mv_imgs[4], mv_imgs[5] @spaces.GPU def run(self, mvimg_model, text, image, crop_size, seed, guidance_scale, step): self.seed = seed self.guidance_scale = guidance_scale self.step = step if mvimg_model.upper() == "CRM": return self.gen_image_from_crm(image) elif mvimg_model.upper() == "IMAGEDREAM": return self.gen_image_from_mvdream(image, None) elif mvimg_model.upper() == "WONDER3D": return self.gen_image_from_wonder3d(image, crop_size)