import spaces import torch from diffusers import StableDiffusion3InstructPix2PixPipeline, SD3Transformer2DModel import gradio as gr import PIL.Image import numpy as np from PIL import Image, ImageOps pipe = StableDiffusion3InstructPix2PixPipeline.from_pretrained("BleachNick/SD3_UltraEdit_w_mask", torch_dtype=torch.float16) pipe = pipe.to("cuda") @spaces.GPU(duration=20) def generate(image_mask, prompt, num_inference_steps=50, image_guidance_scale=1.6, guidance_scale=7.5, seed=255): def is_blank_mask(mask_img): # Convert the mask to a numpy array and check if all values are 0 (black/transparent) mask_array = np.array(mask_img.convert('L')) # Convert to luminance to simplify the check return np.all(mask_array == 0) # Set the seed for reproducibility seed = int(seed) generator = torch.manual_seed(seed) img = image_mask["background"].convert("RGB") mask_img = image_mask["layers"][0].getchannel('A').convert("RGB") # Central crop to desired size desired_size = (512, 512) img = ImageOps.fit(img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5)) mask_img = ImageOps.fit(mask_img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5)) if is_blank_mask(mask_img): # Create a mask of the same size with all values set to 255 (white) mask_img = PIL.Image.new('RGB', img.size, color=(255, 255, 255)) mask_img = mask_img.convert('RGB') image = pipe( prompt, image=img, mask_img=mask_img, num_inference_steps=num_inference_steps, image_guidance_scale=image_guidance_scale, guidance_scale=guidance_scale, generator=generator ).images[0] return image,mask_img # image_mask_input = gr.ImageMask(label="Input Image", type="pil", brush_color="#000000", elem_id="inputmask", # shape=(512, 512)) image_mask_input = gr.ImageMask(sources='upload',type="pil",label="Input Image: Mask with pen or leave unmasked",transforms=(),layers=False) prompt_input = gr.Textbox(label="Prompt") num_inference_steps_input = gr.Slider(minimum=0, maximum=100, value=50, label="Number of Inference Steps") image_guidance_scale_input = gr.Slider(minimum=0.0, maximum=2.5, value=1.5, label="Image Guidance Scale") guidance_scale_input = gr.Slider(minimum=0.0, maximum=17.5, value=12.5, label="Guidance Scale") seed_input = gr.Textbox(value="255", label="Random Seed") inputs = [image_mask_input, prompt_input, num_inference_steps_input, image_guidance_scale_input, guidance_scale_input, seed_input] outputs = gr.Image(label="Generated Image") # Custom HTML content article_html = """
This interface allows you to generate images based on a given mask and prompt. Use the sliders to adjust the inference steps and guidance scales, and provide a seed for reproducibility.
""" demo = gr.Interface( fn=generate, inputs=inputs, outputs=outputs, article=article_html # Add article parameter ) demo.queue().launch()