3d_animation_toolkit / launch /image_generation.py
abreza's picture
refactor: Update image generation pipeline to use playground v2.5
4e92ab0
raw
history blame
No virus
4.26 kB
import os
import gradio as gr
import rembg
import spaces
import torch
from diffusers import DiffusionPipeline
from src.utils.infer_util import (remove_background, resize_foreground)
pipe = DiffusionPipeline.from_pretrained(
"playgroundai/playground-v2.5-1024px-aesthetic",
torch_dtype=torch.float16,
variant="fp16"
).to("cuda")
def generate_prompt(subject, style, color_scheme, angle, lighting_type, additional_details):
return f"A 3D cartoon render of {subject}, featuring the entire body and shape, on a transparent background. The style should be {style}, with {color_scheme} colors, emphasizing the essential features and lines. The pose should clearly showcase the full form of the {subject} from a {angle} perspective. Lighting is {lighting_type}, highlighting the volume and depth of the subject. {additional_details}. Output as a high-resolution PNG with no background."
@spaces.GPU
def generate_image(subject, style, color_scheme, angle, lighting_type, additional_details):
prompt = generate_prompt(subject, style, color_scheme,
angle, lighting_type, additional_details)
results = pipe(prompt, num_inference_steps=25, guidance_scale=7.5)
return results.images[0]
def check_input_image(input_image):
if input_image is None:
raise gr.Error("No image selected!")
def preprocess(input_image):
rembg_session = rembg.new_session()
input_image = remove_background(input_image, rembg_session)
input_image = resize_foreground(input_image, 0.85)
return input_image
def image_generation_ui():
with gr.Row():
subject = gr.Textbox(label='Subject', scale=2)
style = gr.Dropdown(
label='Style',
choices=['Pixar-like', 'Disney-esque', 'Anime-inspired'],
value='Pixar-like',
multiselect=False,
scale=2
)
color_scheme = gr.Dropdown(
label='Color Scheme',
choices=['Vibrant', 'Pastel', 'Monochromatic', 'Black and White'],
value='Vibrant',
multiselect=False,
scale=2
)
angle = gr.Dropdown(
label='Angle',
choices=['Front', 'Side', 'Three-quarter'],
value='Front',
multiselect=False,
scale=2
)
lighting_type = gr.Dropdown(
label='Lighting Type',
choices=['Bright and Even', 'Dramatic Shadows', 'Soft and Warm'],
value='Bright and Even',
multiselect=False,
scale=2
)
additional_details = gr.Textbox(label='Additional Details', scale=2)
submit_prompt = gr.Button('Generate Image', scale=1, variant='primary')
with gr.Row(variant="panel"):
with gr.Column():
with gr.Row():
input_image = gr.Image(
label="Input Image",
image_mode="RGBA",
sources="upload",
type="pil",
elem_id="content_image",
)
processed_image = gr.Image(
label="Processed Image",
image_mode="RGBA",
type="pil",
interactive=False
)
with gr.Row():
submit_process = gr.Button(
"Remove Background", elem_id="process", variant="primary")
with gr.Row(variant="panel"):
gr.Examples(
examples=[os.path.join("examples", img_name)
for img_name in sorted(os.listdir("examples"))],
inputs=[input_image],
label="Examples",
cache_examples=False,
examples_per_page=16
)
submit_prompt.click(fn=generate_image, inputs=[subject, style, color_scheme, angle, lighting_type, additional_details], outputs=input_image).success(
fn=preprocess, inputs=[input_image], outputs=[processed_image]
)
submit_process.click(fn=check_input_image, inputs=[input_image]).success(
fn=preprocess, inputs=[input_image], outputs=[processed_image],
)
return input_image, processed_image