import gradio as gr import torch from PIL import Image from diffusers import AutoPipelineForText2Image, DDIMScheduler import numpy as np from torchvision import transforms import spaces # Initialize the pipeline pipeline = AutoPipelineForText2Image.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 ) # Configure the scheduler for the pipeline pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) # Load IP adapter with specified weights and set the scale for each component pipeline.load_ip_adapter( "h94/IP-Adapter", subfolder="sdxl_models", weight_name=[ "ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors" ] ) pipeline.set_ip_adapter_scale([0.7, 0.5]) # Define the desired size for the images desired_size = (1024, 1024) @spaces.GPU def transform_image(face_image): # Move the pipeline to the GPU inside the function pipeline.to("cuda") generator = torch.Generator(device="cuda").manual_seed(0) # Process the input face image if isinstance(face_image, Image.Image): processed_face_image = face_image elif isinstance(face_image, np.ndarray): processed_face_image = Image.fromarray(face_image) else: raise ValueError("Unsupported image format") # Convert the processed face image to RGB format if it has only 1 channel if processed_face_image.mode == 'L': processed_face_image = processed_face_image.convert('RGB') # Resize the face image to 1024x1024 processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS) # Load the style image from the local path, resize it to 1024x1024, and convert to tensor style_image_path = "examples/soyjak2.jpg" # Ensure this path is correct style_image = Image.open(style_image_path).resize(desired_size, Image.LANCZOS) style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda") # Perform the transformation using the configured pipeline image = pipeline( prompt="soyjak", ip_adapter_image=[style_image_tensor, processed_face_image], # Ensure these are tensors negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", num_inference_steps=30, generator=generator, ).images[0] # Convert the tensor to a PIL Image to display it in Gradio image = transforms.ToPILImage()(image.squeeze(0)) # Move the pipeline back to CPU after processing to release GPU resources pipeline.to("cpu") return image # Gradio interface setup demo = gr.Interface( fn=transform_image, inputs=gr.Image(label="Upload your face image"), outputs=gr.Image(label="Your Soyjak"), title="InstaSoyjak - turn anyone into a Soyjak", description="All you need to do is upload an image. Please use responsibly.", ) demo.queue(max_size=20) demo.launch()