import gradio as gr
import numpy as np
import os
import matplotlib.pyplot as plt
from utils.parse import filter_boxes, parse_input_with_negative, show_boxes
from generation import run as run_ours
from baseline import run as run_baseline
import torch
from shared import DEFAULT_SO_NEGATIVE_PROMPT, DEFAULT_OVERALL_NEGATIVE_PROMPT
from examples import stage1_examples, stage2_examples, default_template, simplified_prompt, prompt_placeholder, layout_placeholder
cuda_available = torch.cuda.is_available()
print(f"Is CUDA available: {torch.cuda.is_available()}")
if cuda_available:
gpu_memory = torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory
low_memory = gpu_memory <= 16 * 1024 ** 3
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}. With GPU memory: {gpu_memory}. Low memory: {low_memory}")
else:
low_memory = False
cache_examples = True
default_num_inference_steps = 20 if low_memory else 50
def get_lmd_prompt(prompt, template=default_template):
if prompt == "":
prompt = prompt_placeholder
if template == "":
template = default_template
return simplified_prompt.format(template=template, prompt=prompt)
def get_layout_image(response):
if response == "":
response = layout_placeholder
gen_boxes, bg_prompt, neg_prompt = parse_input_with_negative(response, no_input=True)
fig = plt.figure(figsize=(8, 8))
# https://stackoverflow.com/questions/7821518/save-plot-to-numpy-array
show_boxes(gen_boxes, bg_prompt, neg_prompt)
# If we haven't already shown or saved the plot, then we need to
# draw the figure first...
fig.canvas.draw()
# Now we can save it to a numpy array.
data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
plt.clf()
return data
def get_layout_image_gallery(response):
return [get_layout_image(response)]
def get_ours_image(response, overall_prompt_override="", seed=0, num_inference_steps=250, dpm_scheduler=True, use_autocast=False, fg_seed_start=20, fg_blending_ratio=0.1, frozen_step_ratio=0.5, attn_guidance_step_ratio=0.6, gligen_scheduled_sampling_beta=0.4, attn_guidance_scale=20, use_ref_ca=True, so_negative_prompt=DEFAULT_SO_NEGATIVE_PROMPT, overall_negative_prompt=DEFAULT_OVERALL_NEGATIVE_PROMPT, show_so_imgs=False, scale_boxes=False):
if response == "":
if overall_prompt_override == "":
# Both are empty so generate a placeholder
response = layout_placeholder
else:
raise gr.Error("You entered a prompt for overall image but left the ChatGPT response empty. Please paste ChatGPT response or select an example below to get started.")
gen_boxes, bg_prompt, neg_prompt = parse_input_with_negative(response, no_input=True)
gen_boxes = filter_boxes(gen_boxes, scale_boxes=scale_boxes)
spec = {
# prompt is unused
'prompt': '',
'gen_boxes': gen_boxes,
'bg_prompt': bg_prompt,
'extra_neg_prompt': neg_prompt
}
if dpm_scheduler:
scheduler_key = "dpm_scheduler"
else:
scheduler_key = "scheduler"
overall_max_index_step = int(attn_guidance_step_ratio * num_inference_steps)
image_np, so_img_list = run_ours(
spec, bg_seed=seed, overall_prompt_override=overall_prompt_override, fg_seed_start=fg_seed_start,
fg_blending_ratio=fg_blending_ratio,frozen_step_ratio=frozen_step_ratio, use_autocast=use_autocast,
so_gligen_scheduled_sampling_beta=gligen_scheduled_sampling_beta, overall_gligen_scheduled_sampling_beta=gligen_scheduled_sampling_beta, num_inference_steps=num_inference_steps, scheduler_key=scheduler_key,
use_ref_ca=use_ref_ca, so_negative_prompt=so_negative_prompt, overall_negative_prompt=overall_negative_prompt,
loss_scale=attn_guidance_scale, max_index_step=0, overall_loss_scale=attn_guidance_scale, overall_max_index_step=overall_max_index_step,
)
images = [image_np]
if show_so_imgs:
images.extend([np.asarray(so_img) for so_img in so_img_list])
if cuda_available:
print(f"Max GPU memory allocated: {torch.cuda.max_memory_allocated() / 1024 ** 3:.2f} GB")
torch.cuda.reset_max_memory_allocated()
return images
def get_baseline_image(prompt, seed=0):
if prompt == "":
prompt = prompt_placeholder
scheduler_key = "dpm_scheduler"
num_inference_steps = 20
image_np = run_baseline(prompt, bg_seed=seed, scheduler_key=scheduler_key, num_inference_steps=num_inference_steps)
return [image_np]
duplicate_html = ''
html = f"""
Try some examples at the bottom of the page to get started!
Tips:
1. If ChatGPT doesn't generate layout, add/remove the trailing space (added by default) and/or use GPT-4.
2. You can perform multi-round specification by giving ChatGPT follow-up requests (e.g., make the objects bigger or move the objects).
3. You can also try prompts in Simplified Chinese. You need to leave "prompt for overall image" empty in this case. If you want to try prompts in another language, translate the first line of last example to your language.
4. The diffusion model only runs {default_num_inference_steps} steps by default in this demo. You can make it run more steps to get higher quality images (or tweak frozen steps/guidance steps for better guidance and coherence).
5. Duplicate this space and add GPU or clone the space and run locally to skip the queue and run our model faster. (Currently we are using a T4 GPU on this space, which is quite slow, and you can add a A10G to make it 5x faster) {duplicate_html}
An implementation note (updated): In this demo, we provide a few modes: faster generation by disabling attention/per-box guidance. The standard version describes what is implemented for the paper. You can also set GLIGEN guidance steps ratio to 0 to disable GLIGEN and to see what you get with only the original SD weights.