InstaSoyjak / app.py
AP123's picture
Update app.py
f32daf2 verified
raw
history blame
2.88 kB
import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
import numpy as np
from torchvision import transforms
import spaces # Make sure to import spaces
# Initialize the pipeline
pipeline = AutoPipelineForText2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16
)
# Configure the scheduler for the pipeline
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
# Load IP adapter with specified weights and set the scale for each component
pipeline.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name=[
"ip-adapter-plus_sdxl_vit-h.safetensors",
"ip-adapter-plus-face_sdxl_vit-h.safetensors"
]
)
pipeline.set_ip_adapter_scale([0.7, 0.5])
# Define the desired size for the images
desired_size = (1024, 1024)
@spaces.GPU
def transform_image(face_image):
# Move the pipeline to the GPU inside the function
pipeline.to("cuda")
generator = torch.Generator(device="cuda").manual_seed(0)
# Process the input face image
if isinstance(face_image, Image.Image):
processed_face_image = face_image
elif isinstance(face_image, np.ndarray):
processed_face_image = Image.fromarray(face_image)
else:
raise ValueError("Unsupported image format")
# Resize the face image and convert to tensor
processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS)
processed_face_tensor = transforms.ToTensor()(processed_face_image).unsqueeze(0).to("cuda")
# Load the style image from the local path, resize it and convert to tensor
style_image_path = "examples/soyjak2.jpeg" # Ensure this path is correct
style_image = Image.open(style_image_path).resize(desired_size, Image.LANCZOS)
style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda")
# Perform the transformation using the configured pipeline
image = pipeline(
prompt="soyjak",
ip_adapter_image=[style_image_tensor, processed_face_tensor], # Ensure these are tensors
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
num_inference_steps=30,
generator=generator,
).images[0]
# Convert the tensor to a PIL Image to display it in Gradio
image = transforms.ToPILImage()(image.squeeze(0))
# Move the pipeline back to CPU after processing to release GPU resources
pipeline.to("cpu")
return image
# Gradio interface setup
demo = gr.Interface(
fn=transform_image,
inputs=gr.Image(label="Upload your face image"),
outputs=gr.Image(label="Your Soyjak"),
title="InstaSoyjak - turn anyone into a Soyjak",
description="All you need to do is upload an image. Please use responsibly.",
)
demo.queue(max_size=20)
demo.launch()