import gradio as gr # pyright: ignore[reportMissingTypeStubs] import pillow_heif # pyright: ignore[reportMissingTypeStubs] import spaces # pyright: ignore[reportMissingTypeStubs] import torch from huggingface_hub import ( # pyright: ignore[reportMissingTypeStubs] hf_hub_download, # pyright: ignore[reportUnknownVariableType] ) from PIL import Image from refiners.fluxion.utils import manual_seed, no_grad from refiners.foundationals.latent_diffusion.stable_diffusion_1 import StableDiffusion_1 pillow_heif.register_heif_opener() # pyright: ignore[reportUnknownMemberType] pillow_heif.register_avif_opener() # pyright: ignore[reportUnknownMemberType] TITLE = """ # SD1.5 with Refiners """ # initialize the model, on the cpu DEVICE_CPU = torch.device("cpu") DEVICE_GPU = torch.device("cuda" if torch.cuda.is_available() else "cpu") DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32 model = StableDiffusion_1(device=DEVICE_CPU, dtype=DTYPE) model.unet.load_from_safetensors( tensors_path=hf_hub_download( repo_id="refiners/sd15.unet", filename="model.safetensors", revision="6b01fc610c7465fa79e44c52c4d2eb0ea56821c9", ) ) model.lda.load_from_safetensors( tensors_path=hf_hub_download( repo_id="refiners/sd15.autoencoder", filename="model.safetensors", revision="7565efe4812d8e14072111ab326b15eea4c908a5", ) ) model.clip_text_encoder.load_from_safetensors( tensors_path=hf_hub_download( repo_id="refiners/sd15.text_encoder", filename="model.safetensors", revision="1b5023ecf0d646b7403f4ad182b6f0ab6b251fef", ) ) # "move" the model to the gpu, this is handled/intercepted by Zero GPU model.to(device=DEVICE_GPU, dtype=DTYPE) model.unet.to(device=DEVICE_GPU, dtype=DTYPE) model.lda.to(device=DEVICE_GPU, dtype=DTYPE) model.clip_text_encoder.to(device=DEVICE_GPU, dtype=DTYPE) model.solver.to(device=DEVICE_GPU, dtype=DTYPE) model.device = DEVICE_GPU model.dtype = DTYPE @spaces.GPU @no_grad() def process( prompt: str, negative_prompt: str, condition_scale: float, num_inference_steps: int, seed: int, ) -> Image.Image: assert condition_scale >= 0 assert num_inference_steps > 0 assert seed >= 0 # set the seed manual_seed(seed) # compute embeddings clip_text_embedding = model.compute_clip_text_embedding( text=prompt, negative_text=negative_prompt, ) # init latents x = model.init_latents(size=(512, 512)) # denoise latents for step in model.steps: x = model( x, step=step, clip_text_embedding=clip_text_embedding, condition_scale=condition_scale, ) # decode denoised latents image = model.lda.latents_to_image(x) return image with gr.Blocks() as demo: gr.Markdown(TITLE) with gr.Column(): with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button( value="Run", scale=0, ) output_image = gr.Image( label="Output Image", image_mode="RGB", type="pil", ) with gr.Accordion("Advanced Settings", open=True): negative_prompt = gr.Textbox( label="Negative Prompt", placeholder="Enter your (optional) negative prompt", ) seed = gr.Slider( label="Seed", minimum=0, maximum=100_000, value=2, step=1, ) condition_scale = gr.Slider( label="Condition scale", minimum=0, maximum=20, value=7.5, step=0.05, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=50, value=30, step=1, ) run_button.click( fn=process, inputs=[ prompt, negative_prompt, condition_scale, num_inference_steps, seed, ], outputs=output_image, ) gr.Examples( # pyright: ignore[reportUnknownMemberType] examples=[ [ "a cute cat, detailed high-quality professional image", "lowres, bad anatomy, bad hands, cropped, worst quality", 7.5, 30, 2, ], [ "a cute dog, detailed high-quality professional image", "lowres, bad anatomy, bad hands, cropped, worst quality", 7.5, 30, 2, ], ], inputs=[ prompt, negative_prompt, condition_scale, num_inference_steps, seed, ], outputs=output_image, fn=process, cache_examples=True, cache_mode="lazy", run_on_click=False, ) demo.launch()