Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from PIL import Image | |
from diffusers import AutoPipelineForText2Image, DDIMScheduler | |
import numpy as np | |
import spaces # Make sure to import spaces | |
# Initialize the pipeline | |
pipeline = AutoPipelineForText2Image.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
torch_dtype=torch.float16 | |
) | |
# Configure the scheduler for the pipeline | |
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) | |
# Load IP adapter with specified weights and set the scale for each component | |
pipeline.load_ip_adapter( | |
"h94/IP-Adapter", | |
subfolder="sdxl_models", | |
weight_name=[ | |
"ip-adapter-plus_sdxl_vit-h.safetensors", | |
"ip-adapter-plus-face_sdxl_vit-h.safetensors" | |
] | |
) | |
pipeline.set_ip_adapter_scale([0.7, 0.5]) | |
# Define the desired size for the images | |
desired_size = (1024, 1024) | |
def transform_image(face_image): | |
# Move the pipeline to the GPU inside the function | |
pipeline.to("cuda") | |
generator = torch.Generator(device="cuda").manual_seed(0) | |
# Process the input face image | |
if isinstance(face_image, Image.Image): | |
processed_face_image = face_image | |
elif isinstance(face_image, np.ndarray): | |
processed_face_image = Image.fromarray(face_image) | |
else: | |
raise ValueError("Unsupported image format") | |
# Resize the face image | |
processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS) | |
# Convert PIL images to PyTorch tensors | |
processed_face_tensor = transforms.ToTensor()(processed_face_image).unsqueeze(0).to("cuda") | |
style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda") | |
# Ensure tensors are the correct shape (C, H, W) | |
if processed_face_tensor.shape[1:] != (3, 1280, 1280): | |
raise ValueError(f"Face image tensor shape is {processed_face_tensor.shape}, but expected shape is (3, 1280, 1280)") | |
# Perform the transformation using the configured pipeline | |
image = pipeline( | |
prompt="soyjak", | |
ip_adapter_image=[style_image_tensor, processed_face_tensor], | |
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", | |
num_inference_steps=30, | |
generator=generator, | |
).images[0] | |
# Move the pipeline back to CPU after processing to release GPU resources | |
pipeline.to("cpu") | |
return transforms.ToPILImage()(image.squeeze(0)) | |
# Gradio interface setup | |
demo = gr.Interface( | |
fn=transform_image, | |
inputs=gr.Image(label="Upload your face image"), | |
outputs=gr.Image(label="Your Soyjak"), | |
title="InstaSoyjak - turn anyone into a Soyjak", | |
description="All you need to do is upload an image. Please use responsibly.", | |
) | |
demo.queue(max_size=20) | |
demo.launch() |