File size: 2,904 Bytes
31772c8
4a2914e
 
31772c8
1bb8a30
6f4ebfe
31772c8
 
4a2914e
 
 
 
 
31772c8
4a2914e
31772c8
4a2914e
31772c8
09d39fb
31772c8
 
 
 
4a2914e
 
31772c8
 
 
 
4a2914e
 
31772c8
4a2914e
31772c8
4a2914e
 
 
 
31772c8
 
 
547d326
 
 
 
31772c8
 
112da9b
547d326
913b2fe
 
 
 
 
 
 
 
 
31772c8
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import torch
from diffusers import DiffusionPipeline, AutoencoderKL
from PIL import Image
import spaces  

# Initialize the VAE model and Diffusion Pipeline outside the GPU-enabled function for efficiency
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)
pipe.load_lora_weights('ritwikraha/khabib_sketch_LoRA')
if torch.cuda.is_available():
    _ = pipe.to("cuda")

# Define the image generation function
@spaces.GPU(enable_queue=True)
def generate_sketch(prompt, negative_prompt="ugly face, multiple bodies, bad anatomy, disfigured, extra fingers", guidance_scale=3, num_inference_steps=50):
    """Generate a sketch image based on a prompt using Stable Diffusion XL with LoRA weights.

    Args:
        prompt (str): Description of the image to generate.
        negative_prompt (str, optional): Negative prompt to avoid certain features. Defaults to common undesirables.
        guidance_scale (int, optional): The strength of the guidance. Defaults to 3.
        num_inference_steps (int, optional): The number of steps for the diffusion process. Defaults to 50.

    Returns:
        PIL.Image: The generated sketch image.
    """
    result = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
    )
    return result.images[0].convert("RGB")  # Ensure the image is in RGB format

# Gradio Interface
description = """
This demo utilizes the SDXL model LoRA adaption weights for stabilityai/stable-diffusion-xl-base-1.0. The weights were trained on sketches of Khabib by ritwikraha using DreamBooth.
"""
# Setup Gradio interface
with gr.Blocks() as demo:
    gr.HTML("<h1><center>Khabib Sketch Maker 🥋</center></h1>")
    gr.Markdown(description)
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(label="Enter your image prompt", value="a sketch of TOK khabib dancing, monchrome, pen sketch", scale=8)
            negative_prompt_input = gr.Textbox(label="Enter negative prompt", value="ugly face, multiple bodies, bad anatomy, disfigured, extra fingers", lines=2)
            guidance_scale_slider = gr.Slider(label="Guidance Scale", minimum=1, maximum=5, value=3)
            steps_slider = gr.Slider(label="Number of Inference Steps", minimum=20, maximum=100, value=50)
            submit_button = gr.Button("Submit")
        with gr.Column():
            output_image = gr.Image(label="Generated Sketch")
    
    submit_button.click(
        fn=generate_sketch,
        inputs=[prompt_input, negative_prompt_input, guidance_scale_slider, steps_slider],
        outputs=output_image
    )

demo.launch()