ShoeGenv2 / util /text_img.py
MaxMilan1
changes
a1f69bb
raw
history blame
1.77 kB
import spaces
import rembg
import torch
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
import cv2
import numpy as np
from PIL import Image
import gradio as gr
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")
def check_prompt(prompt):
if prompt is None:
raise gr.Error("Please enter a prompt!")
controlnet = ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0",
torch_dtype=torch.float16,
use_safetensors=True
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True
)
pipe.to("cuda")
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_image(prompt, negative_prompt, control_image, scale=0.5):
prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
canny_image = get_canny(control_image)
image = pipe(
prompt,
negative_prompt=negative_prompt,
image=canny_image,
controlnet_conditioning_scale=scale,
).images[0]
image2 = rembg.remove(image)
return image2
def get_canny(image):
image = np.array(image)
low_threshold = 100
high_threshold = 200
image = cv2.Canny(image,low_threshold,high_threshold)
image = image[:,:,None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
return canny_image