import gradio as gr
import torch
import numpy as np
import requests
import random
from io import BytesIO
from utils import *
from constants import *
# from inversion_utils import *
# from inversion_utils_dpmplusplus import *
#from modified_pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
from pipeline_semantic_stable_diffusion_img2img_solver import SemanticStableDiffusionImg2ImgPipeline_DPMSolver
from torch import autocast, inference_mode
from diffusers import StableDiffusionPipeline
from diffusers.schedulers import DDIMScheduler
from scheduling_dpmsolver_multistep_inject import DPMSolverMultistepSchedulerInject
from transformers import AutoProcessor, BlipForConditionalGeneration
from share_btn import community_icon_html, loading_icon_html, share_js
# load pipelines
sd_model_id = "runwayml/stable-diffusion-v1-5"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = SemanticStableDiffusionImg2ImgPipeline_DPMSolver.from_pretrained(sd_model_id,torch_dtype=torch.float16).to(device)
# pipe.scheduler = DDIMScheduler.from_config(sd_model_id, subfolder = "scheduler")
pipe.scheduler = DPMSolverMultistepSchedulerInject.from_pretrained(sd_model_id, subfolder="scheduler"
, algorithm_type="sde-dpmsolver++", solver_order=2)
blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base",torch_dtype=torch.float16).to(device)
## IMAGE CPATIONING ##
def caption_image(input_image):
inputs = blip_processor(images=input_image, return_tensors="pt").to(device, torch.float16)
pixel_values = inputs.pixel_values
generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_caption, generated_caption
## DDPM INVERSION AND SAMPLING ##
# def invert(x0, prompt_src="", num_diffusion_steps=100, cfg_scale_src = 3.5, eta = 1):
# # inverts a real image according to Algorihm 1 in https://arxiv.org/pdf/2304.06140.pdf,
# # based on the code in https://github.com/inbarhub/DDPM_inversion
# # returns wt, zs, wts:
# # wt - inverted latent
# # wts - intermediate inverted latents
# # zs - noise maps
# sd_pipe.scheduler.set_timesteps(num_diffusion_steps)
# # vae encode image
# with inference_mode():
# w0 = (sd_pipe.vae.encode(x0).latent_dist.mode() * 0.18215)
# # find Zs and wts - forward process
# wt, zs, wts = inversion_forward_process(sd_pipe, w0, etas=eta, prompt=prompt_src, cfg_scale=cfg_scale_src, prog_bar=True, num_inference_steps=num_diffusion_steps)
# return zs, wts
# def sample(zs, wts, prompt_tar="", cfg_scale_tar=15, skip=36, eta = 1):
# # reverse process (via Zs and wT)
# w0, _ = inversion_reverse_process(sd_pipe, xT=wts[skip], etas=eta, prompts=[prompt_tar], cfg_scales=[cfg_scale_tar], prog_bar=True, zs=zs[skip:])
# # vae decode image
# with inference_mode():
# x0_dec = sd_pipe.vae.decode(1 / 0.18215 * w0).sample
# if x0_dec.dim()<4:
# x0_dec = x0_dec[None,:,:,:]
# img = image_grid(x0_dec)
# return img
# def reconstruct(tar_prompt,
# image_caption,
# tar_cfg_scale,
# skip,
# wts, zs,
# do_reconstruction,
# reconstruction,
# reconstruct_button
# ):
# if reconstruct_button == "Hide Reconstruction":
# return reconstruction.value, reconstruction, ddpm_edited_image.update(visible=False), do_reconstruction, "Show Reconstruction"
# else:
# if do_reconstruction:
# if image_caption.lower() == tar_prompt.lower(): # if image caption was not changed, run actual reconstruction
# tar_prompt = ""
# reconstruction_img = sample(zs.value, wts.value, prompt_tar=tar_prompt, skip=skip, cfg_scale_tar=tar_cfg_scale)
# reconstruction = gr.State(value=reconstruction_img)
# do_reconstruction = False
# return reconstruction.value, reconstruction, ddpm_edited_image.update(visible=True), do_reconstruction, "Hide Reconstruction"
def sample(zs, wts, prompt_tar="", cfg_scale_tar=15, skip=36, eta = 1):
latnets = wts.value[-1].expand(1, -1, -1, -1)
img = pipe(prompt=prompt_tar,
init_latents=latnets,
guidance_scale = cfg_scale_tar,
# num_images_per_prompt=1,
# num_inference_steps=steps,
# use_ddpm=True,
# wts=wts.value,
zs=zs.value).images[0]
return img
def reconstruct(tar_prompt,
image_caption,
tar_cfg_scale,
skip,
wts, zs,
do_reconstruction,
reconstruction,
reconstruct_button
):
if reconstruct_button == "Hide Reconstruction":
return reconstruction.value, reconstruction, ddpm_edited_image.update(visible=False), do_reconstruction, "Show Reconstruction"
else:
if do_reconstruction:
if image_caption.lower() == tar_prompt.lower(): # if image caption was not changed, run actual reconstruction
tar_prompt = ""
latnets = wts.value[-1].expand(1, -1, -1, -1)
reconstruction_img = sample(zs, wts, prompt_tar=tar_prompt, skip=skip, cfg_scale_tar=tar_cfg_scale)
reconstruction = gr.State(value=reconstruction_img)
do_reconstruction = False
return reconstruction.value, reconstruction, ddpm_edited_image.update(visible=True), do_reconstruction, "Hide Reconstruction"
def load_and_invert(
input_image,
do_inversion,
seed, randomize_seed,
wts, zs,
src_prompt ="",
# tar_prompt="",
steps=30,
src_cfg_scale = 3.5,
skip=15,
tar_cfg_scale=15,
progress=gr.Progress(track_tqdm=True)
):
# x0 = load_512(input_image, device=device).to(torch.float16)
if do_inversion or randomize_seed:
if randomize_seed:
seed = randomize_seed_fn()
seed_everything(seed)
# invert and retrieve noise maps and latent
zs_tensor, wts_tensor = pipe.invert(
image_path = input_image,
source_prompt =src_prompt,
source_guidance_scale= src_cfg_scale,
num_inversion_steps = steps,
skip = skip,
eta = 1.0,
)
wts = gr.State(value=wts_tensor)
zs = gr.State(value=zs_tensor)
do_inversion = False
return wts, zs, do_inversion, inversion_progress.update(visible=False)
## SEGA ##
def edit(input_image,
wts, zs,
tar_prompt,
image_caption,
steps,
skip,
tar_cfg_scale,
edit_concept_1,edit_concept_2,edit_concept_3,
guidnace_scale_1,guidnace_scale_2,guidnace_scale_3,
warmup_1, warmup_2, warmup_3,
neg_guidance_1, neg_guidance_2, neg_guidance_3,
threshold_1, threshold_2, threshold_3,
do_reconstruction,
reconstruction,
# for inversion in case it needs to be re computed (and avoid delay):
do_inversion,
seed,
randomize_seed,
src_prompt,
src_cfg_scale,
mask_type):
show_share_button = gr.update(visible=True)
if(mask_type == "No mask"):
use_cross_attn_mask = False
use_intersect_mask = False
elif(mask_type=="Cross Attention Mask"):
use_cross_attn_mask = True
use_intersect_mask = False
elif(mask_type=="Intersect Mask"):
use_cross_attn_mask = False
use_intersect_mask = True
if randomize_seed:
seed = randomize_seed_fn()
seed_everything(seed)
if do_inversion or randomize_seed:
zs_tensor, wts_tensor = pipe.invert(
image_path = input_image,
source_prompt =src_prompt,
source_guidance_scale= src_cfg_scale,
num_inversion_steps = steps,
skip = skip,
eta = 1.0,
)
wts = gr.State(value=wts_tensor)
zs = gr.State(value=zs_tensor)
do_inversion = False
if image_caption.lower() == tar_prompt.lower(): # if image caption was not changed, run pure sega
tar_prompt = ""
if edit_concept_1 != "" or edit_concept_2 != "" or edit_concept_3 != "":
editing_args = dict(
editing_prompt = [edit_concept_1,edit_concept_2,edit_concept_3],
reverse_editing_direction = [ neg_guidance_1, neg_guidance_2, neg_guidance_3,],
edit_warmup_steps=[warmup_1, warmup_2, warmup_3,],
edit_guidance_scale=[guidnace_scale_1,guidnace_scale_2,guidnace_scale_3],
edit_threshold=[threshold_1, threshold_2, threshold_3],
edit_momentum_scale=0.3,
edit_mom_beta=0.6,
eta=1,
use_cross_attn_mask=use_cross_attn_mask,
use_intersect_mask=use_intersect_mask
)
latnets = wts.value[-1].expand(1, -1, -1, -1)
sega_out = pipe(prompt=tar_prompt,
init_latents=latnets,
guidance_scale = tar_cfg_scale,
# num_images_per_prompt=1,
# num_inference_steps=steps,
# use_ddpm=True,
# wts=wts.value,
zs=zs.value, **editing_args)
return sega_out.images[0], reconstruct_button.update(visible=True), do_reconstruction, reconstruction, wts, zs, do_inversion, show_share_button
else: # if sega concepts were not added, performs regular ddpm sampling
if do_reconstruction: # if ddpm sampling wasn't computed
pure_ddpm_img = sample(zs, wts, prompt_tar=tar_prompt, skip=skip, cfg_scale_tar=tar_cfg_scale)
reconstruction = gr.State(value=pure_ddpm_img)
do_reconstruction = False
return pure_ddpm_img, reconstruct_button.update(visible=False), do_reconstruction, reconstruction, wts, zs, do_inversion, show_share_button
return reconstruction.value, reconstruct_button.update(visible=False), do_reconstruction, reconstruction, wts, zs, do_inversion, show_share_button
def randomize_seed_fn():
seed = random.randint(0, np.iinfo(np.int32).max)
return seed
def seed_everything(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
def crop_image(image):
h, w, c = image.shape
if h < w:
offset = (w - h) // 2
image = image[:, offset:offset + h]
elif w < h:
offset = (h - w) // 2
image = image[offset:offset + w]
image = np.array(Image.fromarray(image).resize((512, 512)))
return image
def get_example():
case = [
[
'examples/lemons_input.jpg',
# '',
'apples', 'lemons',
'a ceramic bowl',
'examples/lemons_output.jpg',
7,7,
1,1,
False, True,
50,
25,
5,
0.95, 0.95
],
[
'examples/girl_with_pearl_earring_input.png',
# '',
'glasses', '',
'',
'examples/girl_with_pearl_earring_output.png',
3,7,
3,2,
False,False,
50,
25,
5,
0.97, 0.95
],
[
'examples/flower_field_input.jpg',
# '',
'pink tulips', 'red flowers',
'van gogh painting',
'examples/flower_field_output.png',
20,7,
1,1,
False,True,
50,
25,
7,
0.9, 0.9
],
]
return case
def swap_visibilities(input_image,
edit_concept_1,
edit_concept_2,
tar_prompt,
sega_edited_image,
guidnace_scale_1,
guidnace_scale_2,
warmup_1,
warmup_2,
neg_guidance_1,
neg_guidance_2,
steps,
skip,
tar_cfg_scale,
threshold_1,
threshold_2,
sega_concepts_counter
):
sega_concepts_counter=0
concept1_update = update_display_concept("Remove" if neg_guidance_1 else "Add", edit_concept_1, neg_guidance_1, sega_concepts_counter)
if(edit_concept_2 != ""):
concept2_update = update_display_concept("Remove" if neg_guidance_2 else "Add", edit_concept_2, neg_guidance_2, sega_concepts_counter+1)
else:
concept2_update = gr.update(visible=False), gr.update(visible=False),gr.update(visible=False), gr.update(value=neg_guidance_2),gr.update(visible=True),gr.update(visible=False),sega_concepts_counter+1
return (gr.update(visible=True), *concept1_update[:-1], *concept2_update)
########
# demo #
########
intro = """
LEDITS++: Limitless Image Editing using Text-to-Image Models
"""
with gr.Blocks(css="style.css") as demo:
def update_counter(sega_concepts_counter, concept1, concept2, concept3):
if sega_concepts_counter == "":
sega_concepts_counter = sum(1 for concept in (concept1, concept2, concept3) if concept != '')
return sega_concepts_counter
def remove_concept(sega_concepts_counter, row_triggered):
sega_concepts_counter -= 1
rows_visibility = [gr.update(visible=False) for _ in range(4)]
if(row_triggered-1 > sega_concepts_counter):
rows_visibility[sega_concepts_counter] = gr.update(visible=True)
else:
rows_visibility[row_triggered-1] = gr.update(visible=True)
row1_visibility, row2_visibility, row3_visibility, row4_visibility = rows_visibility
guidance_scale_label = "Concept Guidance Scale"
# enable_interactive = gr.update(interactive=True)
return (gr.update(visible=False),
gr.update(visible=False, value="",),
gr.update(interactive=True, value=""),
gr.update(visible=False,label = guidance_scale_label),
gr.update(interactive=True, value =False),
gr.update(value=DEFAULT_WARMUP_STEPS),
gr.update(value=DEFAULT_THRESHOLD),
gr.update(visible=True),
gr.update(interactive=True, value="custom"),
row1_visibility,
row2_visibility,
row3_visibility,
row4_visibility,
sega_concepts_counter
)
def update_display_concept(button_label, edit_concept, neg_guidance, sega_concepts_counter):
sega_concepts_counter += 1
guidance_scale_label = "Concept Guidance Scale"
if(button_label=='Remove'):
neg_guidance = True
guidance_scale_label = "Negative Guidance Scale"
return (gr.update(visible=True), #boxn
gr.update(visible=True, value=edit_concept), #concept_n
gr.update(visible=True,label = guidance_scale_label), #guidance_scale_n
gr.update(value=neg_guidance),#neg_guidance_n
gr.update(visible=False), #row_n
gr.update(visible=True), #row_n+1
sega_concepts_counter
)
def display_editing_options(run_button, clear_button, sega_tab):
return run_button.update(visible=True), clear_button.update(visible=True), sega_tab.update(visible=True)
def update_interactive_mode(add_button_label):
if add_button_label == "Clear":
return gr.update(interactive=False), gr.update(interactive=False)
else:
return gr.update(interactive=True), gr.update(interactive=True)
def update_dropdown_parms(dropdown):
if dropdown == 'custom':
return DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,DEFAULT_WARMUP_STEPS, DEFAULT_THRESHOLD
elif dropdown =='style':
return STYLE_SEGA_CONCEPT_GUIDANCE_SCALE,STYLE_WARMUP_STEPS, STYLE_THRESHOLD
elif dropdown =='object':
return OBJECT_SEGA_CONCEPT_GUIDANCE_SCALE,OBJECT_WARMUP_STEPS, OBJECT_THRESHOLD
elif dropdown =='faces':
return FACE_SEGA_CONCEPT_GUIDANCE_SCALE,FACE_WARMUP_STEPS, FACE_THRESHOLD
def reset_do_inversion():
return True
def reset_do_reconstruction():
do_reconstruction = True
return do_reconstruction
def reset_image_caption():
return ""
def update_inversion_progress_visibility(input_image, do_inversion):
if do_inversion and not input_image is None:
return inversion_progress.update(visible=True)
else:
return inversion_progress.update(visible=False)
def update_edit_progress_visibility(input_image, do_inversion):
# if do_inversion and not input_image is None:
# return inversion_progress.update(visible=True)
# else:
return inversion_progress.update(visible=True)
gr.HTML(intro)
wts = gr.State()
zs = gr.State()
reconstruction = gr.State()
do_inversion = gr.State(value=True)
do_reconstruction = gr.State(value=True)
sega_concepts_counter = gr.State(0)
image_caption = gr.State(value="")
with gr.Row():
input_image = gr.Image(label="Input Image", interactive=True, elem_id="input_image")
ddpm_edited_image = gr.Image(label=f"Pure DDPM Inversion Image", interactive=False, visible=False)
sega_edited_image = gr.Image(label=f"LEDITS Edited Image", interactive=False, elem_id="output_image")
input_image.style(height=365, width=365)
ddpm_edited_image.style(height=365, width=365)
sega_edited_image.style(height=365, width=365)
with gr.Group(visible=False) as share_btn_container:
with gr.Group(elem_id="share-btn-container"):
community_icon = gr.HTML(community_icon_html, visible=True)
loading_icon = gr.HTML(loading_icon_html, visible=False)
share_button = gr.Button("Share to community", elem_id="share-btn", visible=True)
with gr.Row():
with gr.Box(visible=False, elem_id="box1") as box1:
with gr.Row():
concept_1 = gr.Button(scale=3, value="")
remove_concept1 = gr.Button("x", scale=1, min_width=10)
with gr.Row():
guidnace_scale_1 = gr.Slider(label='Concept Guidance Scale', minimum=1, maximum=30,
info="How strongly the concept should modify the image",
value=DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,
step=0.5, interactive=True)
with gr.Box(visible=False, elem_id="box2") as box2:
with gr.Row():
concept_2 = gr.Button(scale=3, value="")
remove_concept2 = gr.Button("x", scale=1, min_width=10)
with gr.Row():
guidnace_scale_2 = gr.Slider(label='Concept Guidance Scale', minimum=1, maximum=30,
info="How strongly the concept should modify the image",
value=DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,
step=0.5, interactive=True)
with gr.Box(visible=False, elem_id="box3") as box3:
with gr.Row():
concept_3 = gr.Button(scale=3, value="")
remove_concept3 = gr.Button("x", scale=1, min_width=10)
with gr.Row():
guidnace_scale_3 = gr.Slider(label='Concept Guidance Scale', minimum=1, maximum=30,
info="How strongly the concept should modify the image",
value=DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,
step=0.5, interactive=True)
with gr.Row():
inversion_progress = gr.Textbox(visible=False, label="Inversion progress")
with gr.Box():
intro_segs = gr.Markdown("Add/Remove Concepts from your Image with Semantic Guidance")
# 1st SEGA concept
with gr.Row().style(mobile_collapse=False) as row1:
with gr.Column(scale=3, min_width=100):
with gr.Row().style(mobile_collapse=True):
# with gr.Column(scale=3, min_width=100):
edit_concept_1 = gr.Textbox(
label="Concept",
show_label=True,
max_lines=1, value="",
placeholder="E.g.: Sunglasses",
)
# with gr.Column(scale=2, min_width=100):# better mobile ui
dropdown1 = gr.Dropdown(label = "Edit Type", value ='custom' , choices=['custom','style', 'object', 'faces'])
with gr.Column(scale=1, min_width=100, visible=False):
neg_guidance_1 = gr.Checkbox(
label='Remove Concept?')
with gr.Column(scale=1, min_width=100):
with gr.Row().style(mobile_collapse=False): # better mobile ui
with gr.Column():
add_1 = gr.Button('Add')
remove_1 = gr.Button('Remove')
# 2nd SEGA concept
with gr.Row(visible=False).style(equal_height=True) as row2:
with gr.Column(scale=3, min_width=100):
with gr.Row().style(mobile_collapse=True): #better mobile UI
# with gr.Column(scale=3, min_width=100):
edit_concept_2 = gr.Textbox(
label="Concept",
show_label=True,
max_lines=1,
placeholder="E.g.: Realistic",
)
# with gr.Column(scale=2, min_width=100):# better mobile ui
dropdown2 = gr.Dropdown(label = "Edit Type", value ='custom' , choices=['custom','style', 'object', 'faces'])
with gr.Column(scale=1, min_width=100, visible=False):
neg_guidance_2 = gr.Checkbox(
label='Remove Concept?')
with gr.Column(scale=1, min_width=100):
with gr.Row().style(mobile_collapse=False): # better mobile ui
with gr.Column():
add_2 = gr.Button('Add')
remove_2 = gr.Button('Remove')
# 3rd SEGA concept
with gr.Row(visible=False).style(equal_height=True) as row3:
with gr.Column(scale=3, min_width=100):
with gr.Row().style(mobile_collapse=True): #better mobile UI
# with gr.Column(scale=3, min_width=100):
edit_concept_3 = gr.Textbox(
label="Concept",
show_label=True,
max_lines=1,
placeholder="E.g.: orange",
)
# with gr.Column(scale=2, min_width=100):
dropdown3 = gr.Dropdown(label = "Edit Type", value ='custom' , choices=['custom','style', 'object', 'faces'])
with gr.Column(scale=1, min_width=100, visible=False):
neg_guidance_3 = gr.Checkbox(
label='Remove Concept?',visible=True)
with gr.Column(scale=1, min_width=100):
with gr.Row().style(mobile_collapse=False): # better mobile ui
with gr.Column():
add_3 = gr.Button('Add')
remove_3 = gr.Button('Remove')
with gr.Row(visible=False).style(equal_height=True) as row4:
gr.Markdown("### Max of 3 concepts reached. Remove a concept to add more")
#with gr.Row(visible=False).style(mobile_collapse=False, equal_height=True):
# add_concept_button = gr.Button("+1 concept")
with gr.Row().style(mobile_collapse=False, equal_height=True):
tar_prompt = gr.Textbox(
label="Describe your edited image (optional)",
elem_id="target_prompt",
# show_label=False,
max_lines=1, value="", scale=3,
placeholder="Target prompt, DDPM Inversion", info = "DDPM Inversion Prompt. Can help with global changes, modify to what you would like to see"
)
# caption_button = gr.Button("Caption Image", scale=1)
with gr.Row():
run_button = gr.Button("Edit your image!", visible=True)
with gr.Accordion("Advanced Options", open=False):
with gr.Tabs() as tabs:
with gr.TabItem('General options', id=2):
with gr.Row():
with gr.Column(min_width=100):
clear_button = gr.Button("Clear", visible=True)
src_prompt = gr.Textbox(lines=1, label="Source Prompt", interactive=True, placeholder="")
steps = gr.Number(value=50, precision=0, label="Num Diffusion Steps", interactive=True)
src_cfg_scale = gr.Number(value=3.5, label=f"Source Guidance Scale", interactive=True)
mask_type = gr.Radio(choices=["No mask", "Cross Attention Mask", "Intersect Mask"], value="Intersect Mask", label="Mask type")
with gr.Column(min_width=100):
reconstruct_button = gr.Button("Show Reconstruction", visible=False)
skip = gr.Slider(minimum=0, maximum=95, value=25, step=1, label="Skip Steps", interactive=True, info = "Percentage of skipped denoising steps. Bigger values increase fidelity to input image")
tar_cfg_scale = gr.Slider(minimum=1, maximum=30,value=7.5, label=f"Guidance Scale", interactive=True)
seed = gr.Number(value=0, precision=0, label="Seed", interactive=True)
randomize_seed = gr.Checkbox(label='Randomize seed', value=False)
with gr.TabItem('SEGA options', id=3) as sega_advanced_tab:
# 1st SEGA concept
gr.Markdown("1st concept")
with gr.Row().style(mobile_collapse=False, equal_height=True):
warmup_1 = gr.Slider(label='Warmup', minimum=0, maximum=50,
value=DEFAULT_WARMUP_STEPS,
step=1, interactive=True, info="At which step to start applying semantic guidance. Bigger values reduce edit concept's effect")
threshold_1 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99,
value=DEFAULT_THRESHOLD, step=0.01, interactive=True,
info = "Lower the threshold for more effect (e.g. ~0.9 for style transfer)")
# 2nd SEGA concept
gr.Markdown("2nd concept")
with gr.Row() as row2_advanced:
warmup_2 = gr.Slider(label='Warmup', minimum=0, maximum=50,
value=DEFAULT_WARMUP_STEPS,
step=1, interactive=True, info="At which step to start applying semantic guidance. Bigger values reduce edit concept's effect")
threshold_2 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99,
value=DEFAULT_THRESHOLD,
step=0.01, interactive=True,
info = "Lower the threshold for more effect (e.g. ~0.9 for style transfer)")
# 3rd SEGA concept
gr.Markdown("3rd concept")
with gr.Row() as row3_advanced:
warmup_3 = gr.Slider(label='Warmup', minimum=0, maximum=50,
value=DEFAULT_WARMUP_STEPS, step=1,
interactive=True, info="At which step to start applying semantic guidance. Bigger values reduce edit concept's effect")
threshold_3 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99,
value=DEFAULT_THRESHOLD, step=0.01,
interactive=True,
info = "Lower the threshold for more effect (e.g. ~0.9 for style transfer)")
# caption_button.click(
# fn = caption_image,
# inputs = [input_image],
# outputs = [tar_prompt]
# )
#neg_guidance_1.change(fn = update_label, inputs=[neg_guidance_1], outputs=[add_1])
#neg_guidance_2.change(fn = update_label, inputs=[neg_guidance_2], outputs=[add_2])
#neg_guidance_3.change(fn = update_label, inputs=[neg_guidance_3], outputs=[add_3])
add_1.click(fn=update_counter,
inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3],
outputs=sega_concepts_counter,queue=False).then(fn = update_display_concept, inputs=[add_1, edit_concept_1, neg_guidance_1, sega_concepts_counter], outputs=[box1, concept_1, guidnace_scale_1,neg_guidance_1,row1, row2, sega_concepts_counter],queue=False)
add_2.click(fn=update_counter,inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3], outputs=sega_concepts_counter,queue=False).then(fn = update_display_concept, inputs=[add_2, edit_concept_2, neg_guidance_2, sega_concepts_counter], outputs=[box2, concept_2, guidnace_scale_2,neg_guidance_2,row2, row3, sega_concepts_counter],queue=False)
add_3.click(fn=update_counter,inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3], outputs=sega_concepts_counter,queue=False).then(fn = update_display_concept, inputs=[add_3, edit_concept_3, neg_guidance_3, sega_concepts_counter], outputs=[box3, concept_3, guidnace_scale_3,neg_guidance_3,row3, row4, sega_concepts_counter],queue=False)
remove_1.click(fn = update_display_concept, inputs=[remove_1, edit_concept_1, neg_guidance_1, sega_concepts_counter], outputs=[box1, concept_1, guidnace_scale_1,neg_guidance_1,row1, row2, sega_concepts_counter],queue=False)
remove_2.click(fn = update_display_concept, inputs=[remove_2, edit_concept_2, neg_guidance_2 ,sega_concepts_counter], outputs=[box2, concept_2, guidnace_scale_2,neg_guidance_2,row2, row3,sega_concepts_counter],queue=False)
remove_3.click(fn = update_display_concept, inputs=[remove_3, edit_concept_3, neg_guidance_3, sega_concepts_counter], outputs=[box3, concept_3, guidnace_scale_3,neg_guidance_3, row3, row4, sega_concepts_counter],queue=False)
remove_concept1.click(
fn=update_counter,inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3], outputs=sega_concepts_counter,queue=False).then(
fn = remove_concept, inputs=[sega_concepts_counter,gr.State(1)], outputs= [box1, concept_1, edit_concept_1, guidnace_scale_1,neg_guidance_1,warmup_1, threshold_1, add_1, dropdown1, row1, row2, row3, row4, sega_concepts_counter],queue=False)
remove_concept2.click(
fn=update_counter,inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3], outputs=sega_concepts_counter,queue=False).then(
fn = remove_concept, inputs=[sega_concepts_counter,gr.State(2)], outputs=[box2, concept_2, edit_concept_2, guidnace_scale_2,neg_guidance_2, warmup_2, threshold_2, add_2 , dropdown2, row1, row2, row3, row4, sega_concepts_counter],queue=False)
remove_concept3.click(
fn=update_counter,inputs=[sega_concepts_counter,edit_concept_1,edit_concept_2,edit_concept_3], outputs=sega_concepts_counter,queue=False).then(
fn = remove_concept,inputs=[sega_concepts_counter,gr.State(3)], outputs=[box3, concept_3, edit_concept_3, guidnace_scale_3,neg_guidance_3,warmup_3, threshold_3, add_3, dropdown3, row1, row2, row3, row4, sega_concepts_counter],queue=False)
#add_concept_button.click(fn = update_display_concept, inputs=sega_concepts_counter,
# outputs= [row2, row2_advanced, row3, row3_advanced, add_concept_button, sega_concepts_counter], queue = False)
run_button.click(
fn=edit,
inputs=[input_image,
wts, zs,
tar_prompt,
image_caption,
steps,
skip,
tar_cfg_scale,
edit_concept_1,edit_concept_2,edit_concept_3,
guidnace_scale_1,guidnace_scale_2,guidnace_scale_3,
warmup_1, warmup_2, warmup_3,
neg_guidance_1, neg_guidance_2, neg_guidance_3,
threshold_1, threshold_2, threshold_3, do_reconstruction, reconstruction,
do_inversion,
seed,
randomize_seed,
src_prompt,
src_cfg_scale,
mask_type
],
outputs=[sega_edited_image, reconstruct_button, do_reconstruction, reconstruction, wts, zs, do_inversion, share_btn_container])
# .success(fn=update_gallery_display, inputs= [prev_output_image, sega_edited_image], outputs = [gallery, gallery, prev_output_image])
input_image.change(
fn = reset_do_inversion,
outputs = [do_inversion],
queue = False).then(
fn = randomize_seed_fn,
# inputs = [seed, randomize_seed],
outputs = [seed], queue = False)
# Automatically start inverting upon input_image change
input_image.upload(fn = crop_image, inputs = [input_image], outputs = [input_image],queue=False).then(
fn = reset_do_inversion,
outputs = [do_inversion],
queue = False).then(
fn = randomize_seed_fn,
# inputs = [seed, randomize_seed],
outputs = [seed], queue = False).then(fn = caption_image,
inputs = [input_image],
outputs = [tar_prompt, image_caption]).then(fn = update_inversion_progress_visibility, inputs =[input_image,do_inversion],
outputs=[inversion_progress],queue=False).then(
fn=load_and_invert,
inputs=[input_image,
do_inversion,
seed, randomize_seed,
wts, zs,
src_prompt,
# tar_prompt,
steps,
src_cfg_scale,
skip,
tar_cfg_scale,
],
# outputs=[ddpm_edited_image, wts, zs, do_inversion],
outputs=[wts, zs, do_inversion, inversion_progress],
).then(fn = update_inversion_progress_visibility, inputs =[input_image,do_inversion],
outputs=[inversion_progress],queue=False).then(
lambda: reconstruct_button.update(visible=False),
outputs=[reconstruct_button]).then(
fn = reset_do_reconstruction,
outputs = [do_reconstruction],
queue = False)
# Repeat inversion (and reconstruction) when these params are changed:
src_prompt.change(
fn = reset_do_inversion,
outputs = [do_inversion], queue = False).then(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
steps.change(
fn = reset_do_inversion,
outputs = [do_inversion], queue = False).then(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
src_cfg_scale.change(
fn = reset_do_inversion,
outputs = [do_inversion], queue = False).then(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
# Repeat only reconstruction these params are changed:
tar_prompt.change(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
tar_cfg_scale.change(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
skip.change(
fn = reset_do_inversion,
outputs = [do_inversion], queue = False).then(
fn = reset_do_reconstruction,
outputs = [do_reconstruction], queue = False)
dropdown1.change(fn=update_dropdown_parms, inputs = [dropdown1], outputs = [guidnace_scale_1,warmup_1, threshold_1], queue=False)
dropdown2.change(fn=update_dropdown_parms, inputs = [dropdown2], outputs = [guidnace_scale_2,warmup_2, threshold_2], queue=False)
dropdown3.change(fn=update_dropdown_parms, inputs = [dropdown3], outputs = [guidnace_scale_3,warmup_3, threshold_3], queue=False)
clear_components = [input_image,ddpm_edited_image,ddpm_edited_image,sega_edited_image, do_inversion,
src_prompt, steps, src_cfg_scale, seed,
tar_prompt, skip, tar_cfg_scale, reconstruct_button,reconstruct_button,
edit_concept_1, guidnace_scale_1,guidnace_scale_1,warmup_1, threshold_1, neg_guidance_1,dropdown1, concept_1, concept_1, row1,
edit_concept_2, guidnace_scale_2,guidnace_scale_2,warmup_2, threshold_2, neg_guidance_2,dropdown2, concept_2, concept_2, row2,
edit_concept_3, guidnace_scale_3,guidnace_scale_3,warmup_3, threshold_3, neg_guidance_3,dropdown3, concept_3,concept_3, row3,
row4,sega_concepts_counter, box1, box2, box3 ]
clear_components_output_vals = [None, None,ddpm_edited_image.update(visible=False), None, True,
"", DEFAULT_DIFFUSION_STEPS, DEFAULT_SOURCE_GUIDANCE_SCALE, DEFAULT_SEED,
"", DEFAULT_SKIP_STEPS, DEFAULT_TARGET_GUIDANCE_SCALE, reconstruct_button.update(value="Show Reconstruction"),reconstruct_button.update(visible=False),
"", DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,guidnace_scale_1.update(visible=False), DEFAULT_WARMUP_STEPS, DEFAULT_THRESHOLD, DEFAULT_NEGATIVE_GUIDANCE, "custom","", concept_1.update(visible=False), row1.update(visible=True),
"", DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,guidnace_scale_2.update(visible=False), DEFAULT_WARMUP_STEPS, DEFAULT_THRESHOLD, DEFAULT_NEGATIVE_GUIDANCE, "custom","", concept_2.update(visible=False), row2.update(visible=False),
"", DEFAULT_SEGA_CONCEPT_GUIDANCE_SCALE,guidnace_scale_3.update(visible=False), DEFAULT_WARMUP_STEPS, DEFAULT_THRESHOLD, DEFAULT_NEGATIVE_GUIDANCE, "custom","",concept_3.update(visible=False), row3.update(visible=False), row4.update(visible=False), gr.update(value=0),
box1.update(visible=False), box2.update(visible=False), box3.update(visible=False)]
clear_button.click(lambda: clear_components_output_vals, outputs =clear_components)
reconstruct_button.click(lambda: ddpm_edited_image.update(visible=True), outputs=[ddpm_edited_image]).then(fn = reconstruct,
inputs = [tar_prompt,
image_caption,
tar_cfg_scale,
skip,
wts, zs,
do_reconstruction,
reconstruction,
reconstruct_button],
outputs = [ddpm_edited_image,reconstruction, ddpm_edited_image, do_reconstruction, reconstruct_button])
randomize_seed.change(
fn = randomize_seed_fn,
# inputs = [seed, randomize_seed],
outputs = [seed],
queue = False)
share_button.click(None, [], [], _js=share_js)
gr.Examples(
label='Examples',
fn=swap_visibilities,
run_on_click=True,
examples=get_example(),
inputs=[input_image,
edit_concept_1,
edit_concept_2,
tar_prompt,
sega_edited_image,
guidnace_scale_1,
guidnace_scale_2,
warmup_1,
warmup_2,
neg_guidance_1,
neg_guidance_2,
steps,
skip,
tar_cfg_scale,
threshold_1,
threshold_2,
sega_concepts_counter
],
outputs=[share_btn_container, box1, concept_1, guidnace_scale_1,neg_guidance_1, row1, row2,box2, concept_2, guidnace_scale_2,neg_guidance_2,row2, row3,sega_concepts_counter],
cache_examples=True
)
demo.queue()
demo.launch()
# demo.launch(share=True)