import random import tempfile import time import gradio as gr import numpy as np import torch from gradio import inputs from diffusers import ( AutoencoderKL, DDIMScheduler, UNet2DConditionModel, ) from modules.model import CrossAttnProcessor, StableDiffusionPipeline, load_lora_attn_procs from torchvision import transforms from transformers import CLIPTokenizer, CLIPTextModel from PIL import Image from pathlib import Path from safetensors.torch import load_file import modules.safe as _ models = [ ("AbyssOrangeMix_Base", "OrangeMix/AbyssOrangeMix2"), ] base_name = "AbyssOrangeMix_Base" base_model = "OrangeMix/AbyssOrangeMix2" samplers_k_diffusion = [ ("Euler a", "sample_euler_ancestral", {}), ("Euler", "sample_euler", {}), ("LMS", "sample_lms", {}), ("Heun", "sample_heun", {}), ("DPM2", "sample_dpm_2", {"discard_next_to_last_sigma": True}), ("DPM2 a", "sample_dpm_2_ancestral", {"discard_next_to_last_sigma": True}), ("DPM++ 2S a", "sample_dpmpp_2s_ancestral", {}), ("DPM++ 2M", "sample_dpmpp_2m", {}), ("DPM++ SDE", "sample_dpmpp_sde", {}), ("DPM fast", "sample_dpm_fast", {}), ("DPM adaptive", "sample_dpm_adaptive", {}), ("LMS Karras", "sample_lms", {"scheduler": "karras"}), ( "DPM2 Karras", "sample_dpm_2", {"scheduler": "karras", "discard_next_to_last_sigma": True}, ), ( "DPM2 a Karras", "sample_dpm_2_ancestral", {"scheduler": "karras", "discard_next_to_last_sigma": True}, ), ("DPM++ 2S a Karras", "sample_dpmpp_2s_ancestral", {"scheduler": "karras"}), ("DPM++ 2M Karras", "sample_dpmpp_2m", {"scheduler": "karras"}), ("DPM++ SDE Karras", "sample_dpmpp_sde", {"scheduler": "karras"}), ] start_time = time.time() scheduler = DDIMScheduler.from_pretrained( base_model, subfolder="scheduler", ) vae = AutoencoderKL.from_pretrained( "stabilityai/sd-vae-ft-ema", torch_dtype=torch.float32 ) text_encoder = CLIPTextModel.from_pretrained( base_model, subfolder="text_encoder", torch_dtype=torch.float32, ) tokenizer = CLIPTokenizer.from_pretrained( base_model, subfolder="tokenizer", torch_dtype=torch.float32, ) unet = UNet2DConditionModel.from_pretrained( base_model, subfolder="unet", torch_dtype=torch.float32, ) pipe = StableDiffusionPipeline( text_encoder=text_encoder, tokenizer=tokenizer, unet=unet, vae=vae, scheduler=scheduler, ) unet.set_attn_processor(CrossAttnProcessor) if torch.cuda.is_available(): pipe = pipe.to("cuda") def get_model_list(): model_available = [] for model in models: if Path(model[1]).is_dir(): model_available.append(model) return model_available unet_cache = dict() def get_model(name): keys = [k[0] for k in models] if name not in unet_cache: if name not in keys: raise ValueError(name) else: unet = UNet2DConditionModel.from_pretrained( models[keys.index(name)][1], subfolder="unet", torch_dtype=torch.float32, ) unet_cache[name] = unet g_unet = unet_cache[name] g_unet.set_attn_processor(None) return g_unet def error_str(error, title="Error"): return ( f"""#### {title} {error}""" if error else "" ) te_base_weight = text_encoder.get_input_embeddings().weight.data.detach().clone() def restore_all(): global te_base_weight, tokenizer text_encoder.get_input_embeddings().weight.data = te_base_weight tokenizer = CLIPTokenizer.from_pretrained( "/root/workspace/storage/models/orangemix", subfolder="tokenizer", torch_dtype=torch.float16, ) def inference( prompt, guidance, steps, width=512, height=512, seed=0, neg_prompt="", state=None, g_strength=0.4, img_input=None, i2i_scale=0.5, hr_enabled=False, hr_method="Latent", hr_scale=1.5, hr_denoise=0.8, sampler="DPM++ 2M Karras", embs=None, model=None, lora_state=None, lora_scale=None, ): global pipe, unet, tokenizer, text_encoder if seed is None or seed == 0: seed = random.randint(0, 2147483647) if torch.cuda.is_available(): generator = torch.Generator("cuda").manual_seed(int(seed)) else: generator = torch.Generator().manual_seed(int(seed)) local_unet = get_model(model) if lora_state is not None and lora_state != "": load_lora_attn_procs(lora_state, local_unet, lora_scale) else: local_unet.set_attn_processor(CrossAttnProcessor()) pipe.setup_unet(local_unet) sampler_name, sampler_opt = None, None for label, funcname, options in samplers_k_diffusion: if label == sampler: sampler_name, sampler_opt = funcname, options if embs is not None and len(embs) > 0: delta_weight = [] for name, file in embs.items(): if str(file).endswith(".pt"): loaded_learned_embeds = torch.load(file, map_location="cpu") else: loaded_learned_embeds = load_file(file, device="cpu") loaded_learned_embeds = loaded_learned_embeds["string_to_param"]["*"] added_length = tokenizer.add_tokens(name) assert added_length == loaded_learned_embeds.shape[0] delta_weight.append(loaded_learned_embeds) delta_weight = torch.cat(delta_weight, dim=0) text_encoder.resize_token_embeddings(len(tokenizer)) text_encoder.get_input_embeddings().weight.data[-delta_weight.shape[0]:] = delta_weight config = { "negative_prompt": neg_prompt, "num_inference_steps": int(steps), "guidance_scale": guidance, "generator": generator, "sampler_name": sampler_name, "sampler_opt": sampler_opt, "pww_state": state, "pww_attn_weight": g_strength, } if img_input is not None: ratio = min(height / img_input.height, width / img_input.width) img_input = img_input.resize( (int(img_input.width * ratio), int(img_input.height * ratio)), Image.LANCZOS ) result = pipe.img2img(prompt, image=img_input, strength=i2i_scale, **config) elif hr_enabled: result = pipe.txt2img( prompt, width=width, height=height, upscale=True, upscale_x=hr_scale, upscale_denoising_strength=hr_denoise, **config, **latent_upscale_modes[hr_method], ) else: result = pipe.txt2img(prompt, width=width, height=height, **config) # restore if embs is not None and len(embs) > 0: restore_all() return gr.Image.update(result[0][0], label=f"Initial Seed: {seed}") color_list = [] def get_color(n): for _ in range(n - len(color_list)): color_list.append(tuple(np.random.random(size=3) * 256)) return color_list def create_mixed_img(current, state, w=512, h=512): w, h = int(w), int(h) image_np = np.full([h, w, 4], 255) colors = get_color(len(state)) idx = 0 for key, item in state.items(): if item["map"] is not None: m = item["map"] < 255 alpha = 150 if current == key: alpha = 200 image_np[m] = colors[idx] + (alpha,) idx += 1 return image_np # width.change(apply_new_res, inputs=[width, height, global_stats], outputs=[global_stats, sp, rendered]) def apply_new_res(w, h, state): w, h = int(w), int(h) for key, item in state.items(): if item["map"] is not None: item["map"] = resize(item["map"], w, h) update_img = gr.Image.update(value=create_mixed_img("", state, w, h)) return state, update_img def detect_text(text, state, width, height): t = text.split(",") new_state = {} for item in t: item = item.strip() if item == "": continue if item in state: new_state[item] = { "map": state[item]["map"], "weight": state[item]["weight"], } else: new_state[item] = { "map": None, "weight": 0.5, } update = gr.Radio.update(choices=[key for key in new_state.keys()], value=None) update_img = gr.update(value=create_mixed_img("", new_state, width, height)) update_sketch = gr.update(value=None, interactive=False) return new_state, update_sketch, update, update_img def resize(img, w, h): trs = transforms.Compose( [ transforms.ToPILImage(), transforms.Resize(min(h, w)), transforms.CenterCrop((h, w)), ] ) result = np.array(trs(img), dtype=np.uint8) return result def switch_canvas(entry, state, width, height): if entry == None: return None, 0.5, create_mixed_img("", state, width, height) return ( gr.update(value=None, interactive=True), gr.update(value=state[entry]["weight"]), create_mixed_img(entry, state, width, height), ) def apply_canvas(selected, draw, state, w, h): w, h = int(w), int(h) state[selected]["map"] = resize(draw, w, h) return state, gr.Image.update(value=create_mixed_img(selected, state, w, h)) def apply_weight(selected, weight, state): state[selected]["weight"] = weight return state # sp2, radio, width, height, global_stats def apply_image(image, selected, w, h, strgength, state): if selected is not None: state[selected] = {"map": resize(image, w, h), "weight": strgength} return state, gr.Image.update(value=create_mixed_img(selected, state, w, h)) # [ti_state, lora_state, ti_vals, lora_vals, uploads] def add_net(files, ti_state, lora_state): if files is None: return ti_state, "", lora_state, None for file in files: item = Path(file.name) stripedname = str(item.stem).strip() if item.suffix == ".pt": state_dict = torch.load(file.name, map_location="cpu") else: state_dict = load_file(file.name, device="cpu") if any("lora" in k for k in state_dict.keys()): lora_state = file.name else: ti_state[stripedname] = file.name return ti_state, lora_state, gr.Text.update(f"{[key for key in ti_state.keys()]}"), gr.Text.update(f"{lora_state}"), gr.Files.update(value=None) # [ti_state, lora_state, ti_vals, lora_vals, uploads] def clean_states(ti_state, lora_state): return dict(), None, gr.Text.update(f""), gr.Text.update(f""), gr.File.update(value=None) latent_upscale_modes = { "Latent": {"upscale_method": "bilinear", "upscale_antialias": False}, "Latent (antialiased)": {"upscale_method": "bilinear", "upscale_antialias": True}, "Latent (bicubic)": {"upscale_method": "bicubic", "upscale_antialias": False}, "Latent (bicubic antialiased)": { "upscale_method": "bicubic", "upscale_antialias": True, }, "Latent (nearest)": {"upscale_method": "nearest", "upscale_antialias": False}, "Latent (nearest-exact)": { "upscale_method": "nearest-exact", "upscale_antialias": False, }, } css = """ .finetuned-diffusion-div div{ display:inline-flex; align-items:center; gap:.8rem; font-size:1.75rem; padding-top:2rem; } .finetuned-diffusion-div div h1{ font-weight:900; margin-bottom:7px } .finetuned-diffusion-div p{ margin-bottom:10px; font-size:94% } .box { float: left; height: 20px; width: 20px; margin-bottom: 15px; border: 1px solid black; clear: both; } a{ text-decoration:underline } .tabs{ margin-top:0; margin-bottom:0 } #gallery{ min-height:20rem } .no-border { border: none !important; } """ with gr.Blocks(css=css) as demo: gr.HTML( f"""
Hso @ nyanko.sketch2img.gradio
Will use the following formula: w = scale * token_weight_martix * log(1 + sigma) * max(qk).