import spaces import rembg import torch from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoPipelineForImage2Image import cv2 from transformers import pipeline import numpy as np from PIL import Image import gradio as gr # pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") # pipe.to("cuda") def check_prompt(prompt): if prompt is None: raise gr.Error("Please enter a prompt!") imagepipe = AutoPipelineForImage2Image.from_pretrained( "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float32, use_safetensors=True ) controlNet_normal = ControlNetModel.from_pretrained( "fusing/stable-diffusion-v1-5-controlnet-normal", torch_dtype=torch.float16 ) controlNet_depth = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 ) controlNet_MAP = {"Normal": controlNet_normal, "Depth": controlNet_depth} # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True) # Function to generate an image from text using diffusion @spaces.GPU def generate_txttoimg(prompt, control_image, controlnet): prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo" textpipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlNet_MAP[controlnet], torch_dtype=torch.float16, safety_checker = None ) textpipe.to("cuda") if controlnet == "Normal": control_image = get_normal(control_image) elif controlnet == "Depth": control_image = get_depth(control_image) image = textpipe(prompt, image=control_image).images[0] image2 = rembg.remove(image) return image2 @spaces.GPU def generate_imgtoimg(prompt, init_image, strength=0.5): prompt += ", no background, side view, minimalist shot, single shoe, no legs, product photo" imagepipe.to("cuda") image = imagepipe(prompt, image=init_image, strength=strength).images[0] image2 = rembg.remove(image) return image2 def get_normal(image): depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" ) image = depth_estimator(image)['predicted_depth'][0] image = image.numpy() image_depth = image.copy() image_depth -= np.min(image_depth) image_depth /= np.max(image_depth) bg_threhold = 0.4 x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3) x[image_depth < bg_threhold] = 0 y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3) y[image_depth < bg_threhold] = 0 z = np.ones_like(x) * np.pi * 2.0 image = np.stack([x, y, z], axis=2) image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5 image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8) normalimage = Image.fromarray(image) return normalimage def get_depth(image): depth_estimator = pipeline('depth-estimation') image = depth_estimator(image)['depth'] image = np.array(image) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) depthimage = Image.fromarray(image) return depthimage # def get_canny(image): # image = np.array(image) # low_threshold = 100 # high_threshold = 200 # image = cv2.Canny(image,low_threshold,high_threshold) # image = image[:,:,None] # image = np.concatenate([image, image, image], axis=2) # canny_image = Image.fromarray(image) # return canny_image def update_image(image): return image