|
|
|
|
|
|
|
import os |
|
import gc |
|
import json |
|
import random |
|
from typing import List, Optional |
|
|
|
import spaces |
|
import gradio as gr |
|
from huggingface_hub import ModelCard |
|
import torch |
|
import numpy as np |
|
from pydantic import BaseModel |
|
from PIL import Image |
|
from diffusers import ( |
|
FluxPipeline, |
|
FluxImg2ImgPipeline, |
|
FluxInpaintPipeline, |
|
FluxControlNetPipeline, |
|
StableDiffusionXLPipeline, |
|
StableDiffusionXLImg2ImgPipeline, |
|
StableDiffusionXLInpaintPipeline, |
|
StableDiffusionXLControlNetPipeline, |
|
StableDiffusionXLControlNetImg2ImgPipeline, |
|
StableDiffusionXLControlNetInpaintPipeline, |
|
AutoPipelineForText2Image, |
|
AutoPipelineForImage2Image, |
|
AutoPipelineForInpainting, |
|
DiffusionPipeline, |
|
AutoencoderKL, |
|
FluxControlNetModel, |
|
FluxMultiControlNetModel, |
|
ControlNetModel, |
|
) |
|
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker |
|
from huggingface_hub import hf_hub_download |
|
from transformers import CLIPFeatureExtractor |
|
from photomaker import FaceAnalysis2 |
|
from diffusers.schedulers import * |
|
from huggingface_hub import hf_hub_download |
|
from safetensors.torch import load_file |
|
from controlnet_aux.processor import Processor |
|
from photomaker import ( |
|
PhotoMakerStableDiffusionXLPipeline, |
|
PhotoMakerStableDiffusionXLControlNetPipeline, |
|
analyze_faces |
|
) |
|
from sd_embed.embedding_funcs import get_weighted_text_embeddings_sdxl, get_weighted_text_embeddings_flux1 |
|
|
|
|
|
|
|
os.system("pip install --upgrade pip") |
|
|
|
|
|
def load_sd(): |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
models = [ |
|
{ |
|
"repo_id": "black-forest-labs/FLUX.1-dev", |
|
"loader": "flux", |
|
"compute_type": torch.bfloat16, |
|
}, |
|
{ |
|
"repo_id": "SG161222/RealVisXL_V4.0", |
|
"loader": "xl", |
|
"compute_type": torch.float16, |
|
} |
|
] |
|
|
|
for model in models: |
|
try: |
|
model["pipeline"] = AutoPipelineForText2Image.from_pretrained( |
|
model['repo_id'], |
|
torch_dtype = model['compute_type'], |
|
safety_checker = None, |
|
variant = "fp16" |
|
).to(device) |
|
model["pipeline"].enable_model_cpu_offload() |
|
except: |
|
model["pipeline"] = AutoPipelineForText2Image.from_pretrained( |
|
model['repo_id'], |
|
torch_dtype = model['compute_type'], |
|
safety_checker = None |
|
).to(device) |
|
model["pipeline"].enable_model_cpu_offload() |
|
|
|
|
|
|
|
sdxl_vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(device) |
|
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=sdxl_vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to(device) |
|
refiner.enable_model_cpu_offload() |
|
|
|
|
|
|
|
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to(device) |
|
feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32", from_pt=True) |
|
|
|
|
|
|
|
controlnet_models = [ |
|
{ |
|
"repo_id": "xinsir/controlnet-depth-sdxl-1.0", |
|
"name": "depth_xl", |
|
"layers": ["depth"], |
|
"loader": "xl", |
|
"compute_type": torch.float16, |
|
}, |
|
{ |
|
"repo_id": "xinsir/controlnet-canny-sdxl-1.0", |
|
"name": "canny_xl", |
|
"layers": ["canny"], |
|
"loader": "xl", |
|
"compute_type": torch.float16, |
|
}, |
|
{ |
|
"repo_id": "xinsir/controlnet-openpose-sdxl-1.0", |
|
"name": "openpose_xl", |
|
"layers": ["pose"], |
|
"loader": "xl", |
|
"compute_type": torch.float16, |
|
}, |
|
{ |
|
"repo_id": "xinsir/controlnet-scribble-sdxl-1.0", |
|
"name": "scribble_xl", |
|
"layers": ["scribble"], |
|
"loader": "xl", |
|
"compute_type": torch.float16, |
|
}, |
|
{ |
|
"repo_id": "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro", |
|
"name": "flux1_union_pro", |
|
"layers": ["canny_fl", "tile_fl", "depth_fl", "blur_fl", "pose_fl", "gray_fl", "low_quality_fl"], |
|
"loader": "flux-multi", |
|
"compute_type": torch.bfloat16, |
|
} |
|
] |
|
|
|
for controlnet in controlnet_models: |
|
if controlnet["loader"] == "xl": |
|
controlnet["controlnet"] = ControlNetModel.from_pretrained( |
|
controlnet["repo_id"], |
|
torch_dtype = controlnet['compute_type'] |
|
).to(device) |
|
elif controlnet["loader"] == "flux-multi": |
|
controlnet["controlnet"] = FluxMultiControlNetModel([FluxControlNetModel.from_pretrained( |
|
controlnet["repo_id"], |
|
torch_dtype = controlnet['compute_type'] |
|
).to(device)]) |
|
|
|
|
|
|
|
|
|
face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition']) |
|
face_detector.prepare(ctx_id=0, det_size=(640, 640)) |
|
|
|
|
|
|
|
photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker-V2", filename="photomaker-v2.bin", repo_type="model") |
|
|
|
return device, models, sdxl_vae, refiner, safety_checker, feature_extractor, controlnet_models, face_detector, photomaker_ckpt |
|
|
|
|
|
device, models, sdxl_vae, refiner, safety_checker, feature_extractor, controlnet_models, face_detector, photomaker_ckpt = load_sd() |
|
|
|
|
|
|
|
class ControlNetReq(BaseModel): |
|
controlnets: List[str] |
|
control_images: List[Image.Image] |
|
controlnet_conditioning_scale: List[float] |
|
|
|
class Config: |
|
arbitrary_types_allowed=True |
|
|
|
|
|
class SDReq(BaseModel): |
|
model: str = "" |
|
prompt: str = "" |
|
negative_prompt: Optional[str] = "black-forest-labs/FLUX.1-dev" |
|
fast_generation: Optional[bool] = True |
|
loras: Optional[list] = [] |
|
embeddings: Optional[list] = [] |
|
resize_mode: Optional[str] = "resize_and_fill" |
|
scheduler: Optional[str] = "euler_fl" |
|
height: int = 1024 |
|
width: int = 1024 |
|
num_images_per_prompt: int = 1 |
|
num_inference_steps: int = 8 |
|
guidance_scale: float = 3.5 |
|
seed: Optional[int] = 0 |
|
refiner: bool = False |
|
vae: bool = True |
|
controlnet_config: Optional[ControlNetReq] = None |
|
photomaker_images: Optional[List[Image.Image]] = None |
|
|
|
class Config: |
|
arbitrary_types_allowed=True |
|
|
|
|
|
class SDImg2ImgReq(SDReq): |
|
image: Image.Image |
|
strength: float = 1.0 |
|
|
|
class Config: |
|
arbitrary_types_allowed=True |
|
|
|
|
|
class SDInpaintReq(SDImg2ImgReq): |
|
mask_image: Image.Image |
|
|
|
class Config: |
|
arbitrary_types_allowed=True |
|
|
|
|
|
|
|
def get_controlnet(controlnet_config: ControlNetReq): |
|
control_mode = [] |
|
controlnet = [] |
|
|
|
for m in controlnet_models: |
|
for c in controlnet_config.controlnets: |
|
if c in m["layers"]: |
|
control_mode.append(m["layers"].index(c)) |
|
controlnet.append(m["controlnet"]) |
|
|
|
return controlnet, control_mode |
|
|
|
|
|
def get_pipe(request: SDReq | SDImg2ImgReq | SDInpaintReq): |
|
for m in models: |
|
if m["repo_id"] == request.model: |
|
pipeline = m['pipeline'] |
|
controlnet, control_mode = get_controlnet(request.controlnet_config) if request.controlnet_config else (None, None) |
|
|
|
pipe_args = { |
|
"pipeline": pipeline, |
|
"control_mode": control_mode, |
|
} |
|
if request.controlnet_config: |
|
pipe_args["controlnet"] = controlnet |
|
|
|
if not request.photomaker_images: |
|
if isinstance(request, SDReq): |
|
pipe_args['pipeline'] = AutoPipelineForText2Image.from_pipe(**pipe_args) |
|
elif isinstance(request, SDImg2ImgReq): |
|
pipe_args['pipeline'] = AutoPipelineForImage2Image.from_pipe(**pipe_args) |
|
elif isinstance(request, SDInpaintReq): |
|
pipe_args['pipeline'] = AutoPipelineForInpainting.from_pipe(**pipe_args) |
|
else: |
|
raise ValueError(f"Unknown request type: {type(request)}") |
|
elif isinstance(request, any([PhotoMakerStableDiffusionXLPipeline, PhotoMakerStableDiffusionXLControlNetPipeline])): |
|
if request.controlnet_config: |
|
pipe_args['pipeline'] = PhotoMakerStableDiffusionXLControlNetPipeline.from_pipe(**pipe_args) |
|
else: |
|
pipe_args['pipeline'] = PhotoMakerStableDiffusionXLPipeline.from_pipe(**pipe_args) |
|
else: |
|
raise ValueError(f"Invalid request type: {type(request)}") |
|
|
|
return pipe_args |
|
|
|
|
|
def load_scheduler(pipeline, scheduler): |
|
schedulers = { |
|
"dpmpp_2m": (DPMSolverMultistepScheduler, {}), |
|
"dpmpp_2m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True}), |
|
"dpmpp_2m_sde": (DPMSolverMultistepScheduler, {"algorithm_type": "sde-dpmsolver++"}), |
|
"dpmpp_2m_sde_k": (DPMSolverMultistepScheduler, {"algorithm_type": "sde-dpmsolver++", "use_karras_sigmas": True}), |
|
"dpmpp_sde": (DPMSolverSinglestepScheduler, {}), |
|
"dpmpp_sde_k": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": True}), |
|
"dpm2": (KDPM2DiscreteScheduler, {}), |
|
"dpm2_k": (KDPM2DiscreteScheduler, {"use_karras_sigmas": True}), |
|
"dpm2_a": (KDPM2AncestralDiscreteScheduler, {}), |
|
"dpm2_a_k": (KDPM2AncestralDiscreteScheduler, {"use_karras_sigmas": True}), |
|
"euler": (EulerDiscreteScheduler, {}), |
|
"euler_a": (EulerAncestralDiscreteScheduler, {}), |
|
"heun": (HeunDiscreteScheduler, {}), |
|
"lms": (LMSDiscreteScheduler, {}), |
|
"lms_k": (LMSDiscreteScheduler, {"use_karras_sigmas": True}), |
|
"deis": (DEISMultistepScheduler, {}), |
|
"unipc": (UniPCMultistepScheduler, {}), |
|
"fm_euler": (FlowMatchEulerDiscreteScheduler, {}), |
|
} |
|
scheduler_class, kwargs = schedulers.get(scheduler, (None, {})) |
|
|
|
if scheduler_class is not None: |
|
scheduler = scheduler_class.from_config(pipeline.scheduler.config, **kwargs) |
|
else: |
|
raise ValueError(f"Unknown scheduler: {scheduler}") |
|
|
|
return scheduler |
|
|
|
|
|
def load_loras(pipeline, loras, fast_generation): |
|
for i, lora in enumerate(loras): |
|
pipeline.load_lora_weights(lora['repo_id'], adapter_name=f"lora_{i}") |
|
adapter_names = [f"lora_{i}" for i in range(len(loras))] |
|
adapter_weights = [lora['weight'] for lora in loras] |
|
|
|
if fast_generation: |
|
hyper_lora = hf_hub_download( |
|
"ByteDance/Hyper-SD", |
|
"Hyper-FLUX.1-dev-8steps-lora.safetensors" if isinstance(pipeline, FluxPipeline) else "Hyper-SDXL-2steps-lora.safetensors" |
|
) |
|
hyper_weight = 0.125 if isinstance(pipeline, FluxPipeline) else 1.0 |
|
pipeline.load_lora_weights(hyper_lora, adapter_name="hyper_lora") |
|
adapter_names.append("hyper_lora") |
|
adapter_weights.append(hyper_weight) |
|
|
|
pipeline.set_adapters(adapter_names, adapter_weights) |
|
|
|
|
|
def load_xl_embeddings(pipeline, embeddings): |
|
for embedding in embeddings: |
|
state_dict = load_file(hf_hub_download(embedding['repo_id'])) |
|
pipeline.load_textual_inversion(state_dict['clip_g'], token=embedding['token'], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2) |
|
pipeline.load_textual_inversion(state_dict["clip_l"], token=embedding['token'], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer) |
|
|
|
|
|
def resize_images(images: List[Image.Image], height: int, width: int, resize_mode: str): |
|
for image in images: |
|
if resize_mode == "resize_only": |
|
image = image.resize((width, height)) |
|
elif resize_mode == "crop_and_resize": |
|
image = image.crop((0, 0, width, height)) |
|
elif resize_mode == "resize_and_fill": |
|
image = image.resize((width, height), Image.Resampling.LANCZOS) |
|
|
|
return images |
|
|
|
|
|
def get_controlnet_images(controlnets: List[str], control_images: List[Image.Image], height: int, width: int, resize_mode: str): |
|
response_images = [] |
|
control_images = resize_images(control_images, height, width, resize_mode) |
|
for controlnet, image in zip(controlnets, control_images): |
|
if controlnet == "canny" or controlnet == "canny_xs" or controlnet == "canny_fl": |
|
processor = Processor('canny') |
|
elif controlnet == "depth" or controlnet == "depth_xs" or controlnet == "depth_fl": |
|
processor = Processor('depth_midas') |
|
elif controlnet == "pose" or controlnet == "pose_fl": |
|
processor = Processor('openpose_full') |
|
elif controlnet == "scribble": |
|
processor = Processor('scribble') |
|
else: |
|
raise ValueError(f"Invalid Controlnet: {controlnet}") |
|
|
|
response_images.append(processor(image, to_pil=True)) |
|
|
|
return response_images |
|
|
|
|
|
def check_image_safety(images: List[Image.Image]): |
|
safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda") |
|
has_nsfw_concepts = safety_checker( |
|
images=[images], |
|
clip_input=safety_checker_input.pixel_values.to("cuda"), |
|
) |
|
|
|
return has_nsfw_concepts[1] |
|
|
|
|
|
def get_prompt_attention(pipeline, prompt, negative_prompt): |
|
if isinstance(pipeline, (FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline, FluxControlNetPipeline)): |
|
prompt_embeds, pooled_prompt_embeds = get_weighted_text_embeddings_flux1(pipeline, prompt) |
|
return prompt_embeds, None, pooled_prompt_embeds, None |
|
elif isinstance(pipeline, StableDiffusionXLPipeline): |
|
prompt_embeds, prompt_neg_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = get_weighted_text_embeddings_sdxl(pipeline, prompt, negative_prompt) |
|
return prompt_embeds, prompt_neg_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds |
|
else: |
|
raise ValueError(f"Invalid pipeline type: {type(pipeline)}") |
|
|
|
|
|
def get_photomaker_images(photomaker_images: List[Image.Image], height: int, width: int, resize_mode: str): |
|
image_input_ids = [] |
|
image_id_embeds = [] |
|
photomaker_images = resize_images(photomaker_images, height, width, resize_mode) |
|
|
|
for image in photomaker_images: |
|
image_input_ids.append(img) |
|
img = np.array(image)[:, :, ::-1] |
|
faces = analyze_faces(face_detector, image) |
|
if len(faces) > 0: |
|
image_id_embeds.append(torch.from_numpy(faces[0]['embeddings'])) |
|
else: |
|
raise ValueError("No face detected in the image") |
|
|
|
return image_input_ids, image_id_embeds |
|
|
|
|
|
def cleanup(pipeline, loras = None, embeddings = None): |
|
if loras: |
|
pipeline.disable_lora() |
|
pipeline.unload_lora_weights() |
|
if embeddings: |
|
pipeline.unload_textual_inversion() |
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
|
|
|
|
|
|
def gen_img( |
|
request: SDReq | SDImg2ImgReq | SDInpaintReq |
|
): |
|
pipeline_args = get_pipe(request) |
|
pipeline = pipeline_args['pipeline'] |
|
try: |
|
pipeline.scheduler = load_scheduler(pipeline, request.scheduler) |
|
|
|
load_loras(pipeline, request.loras, request.fast_generation) |
|
load_xl_embeddings(pipeline, request.embeddings) |
|
|
|
control_images = get_controlnet_images(request.controlnet_config.controlnets, request.controlnet_config.control_images, request.height, request.width, request.resize_mode) if request.controlnet_config else None |
|
photomaker_images, photomaker_id_embeds = get_photomaker_images(request.photomaker_images, request.height, request.width) if request.photomaker_images else (None, None) |
|
|
|
positive_prompt_embeds, negative_prompt_embeds, positive_prompt_pooled, negative_prompt_pooled = get_prompt_attention(pipeline, request.prompt, request.negative_prompt) |
|
|
|
|
|
args = { |
|
'prompt_embeds': positive_prompt_embeds, |
|
'pooled_prompt_embeds': positive_prompt_pooled, |
|
'height': request.height, |
|
'width': request.width, |
|
'num_images_per_prompt': request.num_images_per_prompt, |
|
'num_inference_steps': request.num_inference_steps, |
|
'guidance_scale': request.guidance_scale, |
|
'generator': [torch.Generator(device=device).manual_seed(request.seed + i) if not request.seed is any([None, 0, -1]) else torch.Generator(device=device).manual_seed(random.randint(0, 2**32 - 1)) for i in range(request.num_images_per_prompt)], |
|
} |
|
|
|
if isinstance(pipeline, any([StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, |
|
StableDiffusionXLControlNetPipeline, StableDiffusionXLControlNetImg2ImgPipeline, StableDiffusionXLControlNetInpaintPipeline])): |
|
args['clip_skip'] = request.clip_skip |
|
args['negative_prompt_embeds'] = negative_prompt_embeds |
|
args['negative_pooled_prompt_embeds'] = negative_prompt_pooled |
|
|
|
if isinstance(pipeline, FluxControlNetPipeline) and request.controlnet_config: |
|
args['control_mode'] = pipeline_args['control_mode'] |
|
args['control_image'] = control_images |
|
args['controlnet_conditioning_scale'] = request.controlnet_conditioning_scale |
|
|
|
if not isinstance(pipeline, FluxControlNetPipeline) and request.controlnet_config: |
|
args['controlnet_conditioning_scale'] = request.controlnet_conditioning_scale |
|
|
|
if isinstance(request, SDReq): |
|
args['image'] = control_images |
|
elif isinstance(request, (SDImg2ImgReq, SDInpaintReq)): |
|
args['control_image'] = control_images |
|
|
|
if request.photomaker_images and isinstance(pipeline, any([PhotoMakerStableDiffusionXLPipeline, PhotoMakerStableDiffusionXLControlNetPipeline])): |
|
args['input_id_images'] = photomaker_images |
|
args['input_id_embeds'] = photomaker_id_embeds |
|
args['start_merge_step'] = 10 |
|
|
|
if isinstance(request, SDImg2ImgReq): |
|
args['image'] = resize_images([request.image], request.height, request.width, request.resize_mode) |
|
args['strength'] = request.strength |
|
elif isinstance(request, SDInpaintReq): |
|
args['image'] = resize_images([request.image], request.height, request.width, request.resize_mode) |
|
args['mask_image'] = resize_images([request.mask_image], request.height, request.width, request.resize_mode) |
|
args['strength'] = request.strength |
|
|
|
images = pipeline(**args).images |
|
|
|
if request.refiner: |
|
images = refiner( |
|
prompt=request.prompt, |
|
num_inference_steps=40, |
|
denoising_start=0.7, |
|
image=images.images |
|
).images |
|
|
|
cleanup(pipeline, request.loras, request.embeddings) |
|
|
|
return images |
|
except Exception as e: |
|
cleanup(pipeline, request.loras, request.embeddings) |
|
raise ValueError(f"Error generating image: {e}") from e |
|
|
|
|
|
|
|
css = """ |
|
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap'); |
|
body { |
|
font-family: 'Poppins', sans-serif !important; |
|
} |
|
.center-content { |
|
text-align: center; |
|
max-width: 600px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
} |
|
.center-content h1 { |
|
font-weight: 600; |
|
margin-bottom: 1rem; |
|
} |
|
.center-content p { |
|
margin-bottom: 1.5rem; |
|
} |
|
""" |
|
|
|
|
|
flux_models = ["black-forest-labs/FLUX.1-dev"] |
|
with open("data/images/loras/flux.json", "r") as f: |
|
loras = json.load(f) |
|
|
|
|
|
|
|
def update_fast_generation(model, fast_generation): |
|
if fast_generation: |
|
return ( |
|
gr.update( |
|
value=3.5 |
|
), |
|
gr.update( |
|
value=8 |
|
) |
|
) |
|
|
|
|
|
def selected_lora_from_gallery(evt: gr.SelectData): |
|
return ( |
|
gr.update( |
|
value=evt.index |
|
) |
|
) |
|
|
|
|
|
def update_selected_lora(custom_lora): |
|
link = custom_lora.split("/") |
|
|
|
if len(link) == 2: |
|
model_card = ModelCard.load(custom_lora) |
|
trigger_word = model_card.data.get("instance_prompt", "") |
|
image_url = f"""https://huggingface.co/{custom_lora}/resolve/main/{model_card.data.get("widget", [{}])[0].get("output", {}).get("url", None)}""" |
|
|
|
custom_lora_info_css = """ |
|
<style> |
|
.custom-lora-info { |
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; |
|
background: linear-gradient(135deg, #4a90e2, #7b61ff); |
|
color: white; |
|
padding: 16px; |
|
border-radius: 8px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
margin: 16px 0; |
|
} |
|
.custom-lora-header { |
|
font-size: 18px; |
|
font-weight: 600; |
|
margin-bottom: 12px; |
|
} |
|
.custom-lora-content { |
|
display: flex; |
|
align-items: center; |
|
background-color: rgba(255, 255, 255, 0.1); |
|
border-radius: 6px; |
|
padding: 12px; |
|
} |
|
.custom-lora-image { |
|
width: 80px; |
|
height: 80px; |
|
object-fit: cover; |
|
border-radius: 6px; |
|
margin-right: 16px; |
|
} |
|
.custom-lora-text h3 { |
|
margin: 0 0 8px 0; |
|
font-size: 16px; |
|
font-weight: 600; |
|
} |
|
.custom-lora-text small { |
|
font-size: 14px; |
|
opacity: 0.9; |
|
} |
|
.custom-trigger-word { |
|
background-color: rgba(255, 255, 255, 0.2); |
|
padding: 2px 6px; |
|
border-radius: 4px; |
|
font-weight: 600; |
|
} |
|
</style> |
|
""" |
|
|
|
custom_lora_info_html = f""" |
|
<div class="custom-lora-info"> |
|
<div class="custom-lora-header">Custom LoRA: {custom_lora}</div> |
|
<div class="custom-lora-content"> |
|
<img class="custom-lora-image" src="{image_url}" alt="LoRA preview"> |
|
<div class="custom-lora-text"> |
|
<h3>{link[1].replace("-", " ").replace("_", " ")}</h3> |
|
<small>{"Using: <span class='custom-trigger-word'>"+trigger_word+"</span> as the trigger word" if trigger_word else "No trigger word found. If there's a trigger word, include it in your prompt"}</small> |
|
</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
custom_lora_info_html = f"{custom_lora_info_css}{custom_lora_info_html}" |
|
|
|
return ( |
|
gr.update( |
|
value=custom_lora, |
|
), |
|
gr.update( |
|
value=custom_lora_info_html, |
|
visible=True |
|
) |
|
) |
|
|
|
else: |
|
return ( |
|
gr.update( |
|
value=custom_lora, |
|
), |
|
gr.update( |
|
value=custom_lora_info_html if len(link) == 0 else "", |
|
visible=False |
|
) |
|
) |
|
|
|
|
|
def add_to_enabled_loras(model, selected_lora, enabled_loras): |
|
lora_data = loras |
|
try: |
|
selected_lora = int(selected_lora) |
|
|
|
if 0 <= selected_lora: |
|
lora_info = lora_data[selected_lora] |
|
enabled_loras.append({ |
|
"repo_id": lora_info["repo"], |
|
"trigger_word": lora_info["trigger_word"] |
|
}) |
|
except ValueError: |
|
link = selected_lora.split("/") |
|
if len(link) == 2: |
|
model_card = ModelCard.load(selected_lora) |
|
trigger_word = model_card.data.get("instance_prompt", "") |
|
enabled_loras.append({ |
|
"repo_id": selected_lora, |
|
"trigger_word": trigger_word |
|
}) |
|
|
|
return ( |
|
gr.update( |
|
value="" |
|
), |
|
gr.update( |
|
value="", |
|
visible=False |
|
), |
|
gr.update( |
|
value=enabled_loras |
|
) |
|
) |
|
|
|
|
|
def update_lora_sliders(enabled_loras): |
|
sliders = [] |
|
remove_buttons = [] |
|
|
|
for lora in enabled_loras: |
|
sliders.append( |
|
gr.update( |
|
label=lora.get("repo_id", ""), |
|
info=f"Trigger Word: {lora.get('trigger_word', '')}", |
|
visible=True, |
|
interactive=True |
|
) |
|
) |
|
remove_buttons.append( |
|
gr.update( |
|
visible=True, |
|
interactive=True |
|
) |
|
) |
|
|
|
if len(sliders) < 6: |
|
for i in range(len(sliders), 6): |
|
sliders.append( |
|
gr.update( |
|
visible=False |
|
) |
|
) |
|
remove_buttons.append( |
|
gr.update( |
|
visible=False |
|
) |
|
) |
|
|
|
return *sliders, *remove_buttons |
|
|
|
|
|
def remove_from_enabled_loras(enabled_loras, index): |
|
enabled_loras.pop(index) |
|
return ( |
|
gr.update( |
|
value=enabled_loras |
|
) |
|
) |
|
|
|
|
|
@spaces.GPU |
|
def generate_image( |
|
model, prompt, negative_prompt, fast_generation, enabled_loras, |
|
lora_slider_0, lora_slider_1, lora_slider_2, lora_slider_3, lora_slider_4, lora_slider_5, |
|
img2img_image, inpaint_image, canny_image, pose_image, depth_image, |
|
img2img_strength, inpaint_strength, canny_strength, pose_strength, depth_strength, |
|
resize_mode, |
|
scheduler, image_height, image_width, image_num_images_per_prompt, |
|
image_num_inference_steps, image_guidance_scale, image_seed, |
|
refiner, vae |
|
): |
|
base_args = { |
|
"model": model, |
|
"prompt": prompt, |
|
"negative_prompt": negative_prompt, |
|
"fast_generation": fast_generation, |
|
"loras": None, |
|
"resize_mode": resize_mode, |
|
"scheduler": scheduler, |
|
"height": int(image_height), |
|
"width": int(image_width), |
|
"num_images_per_prompt": float(image_num_images_per_prompt), |
|
"num_inference_steps": float(image_num_inference_steps), |
|
"guidance_scale": float(image_guidance_scale), |
|
"seed": int(image_seed), |
|
"refiner": refiner, |
|
"vae": vae, |
|
"controlnet_config": None, |
|
} |
|
base_args = SDReq(**base_args) |
|
|
|
if len(enabled_loras) > 0: |
|
base_args.loras = [] |
|
for enabled_lora, lora_slider in zip(enabled_loras, [lora_slider_0, lora_slider_1, lora_slider_2, lora_slider_3, lora_slider_4, lora_slider_5]): |
|
if enabled_lora.get("repo_id", None): |
|
base_args.loras.append( |
|
{ |
|
"repo_id": enabled_lora["repo_id"], |
|
"weight": lora_slider |
|
} |
|
) |
|
|
|
image = None |
|
mask_image = None |
|
strength = None |
|
|
|
if img2img_image: |
|
image = img2img_image |
|
strength = float(img2img_strength) |
|
|
|
base_args = SDImg2ImgReq( |
|
**base_args.__dict__, |
|
image=image, |
|
strength=strength |
|
) |
|
elif inpaint_image: |
|
image = inpaint_image['background'] if not all(pixel == (0, 0, 0) for pixel in list(inpaint_image['background'].getdata())) else None |
|
mask_image = inpaint_image['layers'][0] if image else None |
|
strength = float(inpaint_strength) |
|
|
|
base_args = SDInpaintReq( |
|
**base_args.__dict__, |
|
image=image, |
|
mask_image=mask_image, |
|
strength=strength |
|
) |
|
elif any([canny_image, pose_image, depth_image]): |
|
base_args.controlnet_config = ControlNetReq( |
|
controlnets=[], |
|
control_images=[], |
|
controlnet_conditioning_scale=[] |
|
) |
|
|
|
if canny_image: |
|
base_args.controlnet_config.controlnets.append("canny_fl") |
|
base_args.controlnet_config.control_images.append(canny_image) |
|
base_args.controlnet_config.controlnet_conditioning_scale.append(float(canny_strength)) |
|
if pose_image: |
|
base_args.controlnet_config.controlnets.append("pose_fl") |
|
base_args.controlnet_config.control_images.append(pose_image) |
|
base_args.controlnet_config.controlnet_conditioning_scale.append(float(pose_strength)) |
|
if depth_image: |
|
base_args.controlnet_config.controlnets.append("depth_fl") |
|
base_args.controlnet_config.control_images.append(depth_image) |
|
base_args.controlnet_config.controlnet_conditioning_scale.append(float(depth_strength)) |
|
else: |
|
base_args = SDReq(**base_args.__dict__) |
|
|
|
images = gen_img(base_args) |
|
|
|
return ( |
|
gr.update( |
|
value=images, |
|
interactive=True |
|
) |
|
) |
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: |
|
|
|
with gr.Column(elem_classes="center-content"): |
|
gr.Markdown(""" |
|
# π AAI: All AI |
|
Unleash your creativity with our multi-modal AI platform. |
|
[![Sync code to HF Space](https://github.com/mantrakp04/aai/actions/workflows/hf-space.yml/badge.svg)](https://github.com/mantrakp04/aai/actions/workflows/hf-space.yml) |
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
with gr.Tab(label="πΌοΈ Image"): |
|
with gr.Tabs(): |
|
with gr.Tab("Flux"): |
|
""" |
|
Create the image tab for Generative Image Generation Models |
|
|
|
Args: |
|
models: list |
|
A list containing the models repository paths |
|
gap_iol, gap_la, gap_le, gap_eio, gap_io: Optional[List[dict]] |
|
A list of dictionaries containing the title and component for the custom gradio component |
|
Example: |
|
def gr_comp(): |
|
gr.Label("Hello World") |
|
|
|
[ |
|
{ |
|
'title': "Title", |
|
'component': gr_comp() |
|
} |
|
] |
|
loras: list |
|
A list of dictionaries containing the image and title for the Loras Gallery |
|
Generally a loaded json file from the data folder |
|
|
|
""" |
|
def process_gaps(gaps: List[dict]): |
|
for gap in gaps: |
|
with gr.Accordion(gap['title']): |
|
gap['component'] |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Group() as image_options: |
|
model = gr.Dropdown(label="Models", choices=flux_models, value=flux_models[0], interactive=True) |
|
prompt = gr.Textbox(lines=5, label="Prompt") |
|
negative_prompt = gr.Textbox(label="Negative Prompt") |
|
fast_generation = gr.Checkbox(label="Fast Generation (Hyper-SD) π§ͺ") |
|
|
|
|
|
with gr.Accordion("Loras", open=True): |
|
lora_gallery = gr.Gallery( |
|
label="Gallery", |
|
value=[(lora['image'], lora['title']) for lora in loras], |
|
allow_preview=False, |
|
columns=[3], |
|
type="pil" |
|
) |
|
|
|
with gr.Group(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
custom_lora = gr.Textbox(label="Custom Lora", info="Enter a Huggingface repo path") |
|
selected_lora = gr.Textbox(label="Selected Lora", info="Choose from the gallery or enter a custom LoRA") |
|
|
|
custom_lora_info = gr.HTML(visible=False) |
|
add_lora = gr.Button(value="Add LoRA") |
|
|
|
enabled_loras = gr.State(value=[]) |
|
with gr.Group(): |
|
with gr.Row(): |
|
for i in range(6): |
|
with gr.Column(): |
|
with gr.Column(scale=2): |
|
globals()[f"lora_slider_{i}"] = gr.Slider(label=f"LoRA {i+1}", minimum=0, maximum=1, step=0.01, value=0.8, visible=False, interactive=True) |
|
with gr.Column(): |
|
globals()[f"lora_remove_{i}"] = gr.Button(value="Remove LoRA", visible=False) |
|
|
|
|
|
with gr.Accordion("Embeddings", open=False): |
|
gr.Label("To be implemented") |
|
|
|
|
|
with gr.Accordion("Image Options"): |
|
with gr.Tabs(): |
|
image_options = { |
|
"img2img": "Upload Image", |
|
"inpaint": "Upload Image", |
|
"canny": "Upload Image", |
|
"pose": "Upload Image", |
|
"depth": "Upload Image", |
|
} |
|
|
|
for image_option, label in image_options.items(): |
|
with gr.Tab(image_option): |
|
if not image_option in ['inpaint', 'scribble']: |
|
globals()[f"{image_option}_image"] = gr.Image(label=label, type="pil") |
|
elif image_option in ['inpaint', 'scribble']: |
|
globals()[f"{image_option}_image"] = gr.ImageEditor( |
|
label=label, |
|
image_mode='RGB', |
|
layers=False, |
|
brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed") if image_option == 'inpaint' else gr.Brush(), |
|
interactive=True, |
|
type="pil", |
|
) |
|
|
|
|
|
globals()[f"{image_option}_strength"] = gr.Slider(label="Strength", minimum=0, maximum=1, step=0.01, value=1.0, interactive=True) |
|
|
|
resize_mode = gr.Radio( |
|
label="Resize Mode", |
|
choices=["crop and resize", "resize only", "resize and fill"], |
|
value="resize and fill", |
|
interactive=True |
|
) |
|
|
|
|
|
with gr.Column(): |
|
with gr.Group(): |
|
output_images = gr.Gallery( |
|
label="Output Images", |
|
value=[], |
|
allow_preview=True, |
|
type="pil", |
|
interactive=False, |
|
) |
|
generate_images = gr.Button(value="Generate Images", variant="primary") |
|
|
|
with gr.Accordion("Advance Settings", open=True): |
|
with gr.Row(): |
|
scheduler = gr.Dropdown( |
|
label="Scheduler", |
|
choices = [ |
|
"fm_euler" |
|
], |
|
value="fm_euler", |
|
interactive=True |
|
) |
|
|
|
with gr.Row(): |
|
for column in range(2): |
|
with gr.Column(): |
|
options = [ |
|
("Height", "image_height", 64, 1024, 64, 1024, True), |
|
("Width", "image_width", 64, 1024, 64, 1024, True), |
|
("Num Images Per Prompt", "image_num_images_per_prompt", 1, 4, 1, 1, True), |
|
("Num Inference Steps", "image_num_inference_steps", 1, 100, 1, 20, True), |
|
("Clip Skip", "image_clip_skip", 0, 2, 1, 2, False), |
|
("Guidance Scale", "image_guidance_scale", 0, 20, 0.5, 3.5, True), |
|
("Seed", "image_seed", 0, 100000, 1, random.randint(0, 100000), True), |
|
] |
|
for label, var_name, min_val, max_val, step, value, visible in options[column::2]: |
|
globals()[var_name] = gr.Slider(label=label, minimum=min_val, maximum=max_val, step=step, value=value, visible=visible, interactive=True) |
|
|
|
with gr.Row(): |
|
refiner = gr.Checkbox( |
|
label="Refiner π§ͺ", |
|
value=False, |
|
) |
|
vae = gr.Checkbox( |
|
label="VAE", |
|
value=True, |
|
) |
|
|
|
|
|
|
|
|
|
fast_generation.change(update_fast_generation, [model, fast_generation], [image_guidance_scale, image_num_inference_steps]) |
|
|
|
|
|
|
|
lora_gallery.select(selected_lora_from_gallery, None, selected_lora) |
|
custom_lora.change(update_selected_lora, custom_lora, [custom_lora, selected_lora]) |
|
add_lora.click(add_to_enabled_loras, [model, selected_lora, enabled_loras], [selected_lora, custom_lora_info, enabled_loras]) |
|
enabled_loras.change(update_lora_sliders, enabled_loras, [lora_slider_0, lora_slider_1, lora_slider_2, lora_slider_3, lora_slider_4, lora_slider_5, lora_remove_0, lora_remove_1, lora_remove_2, lora_remove_3, lora_remove_4, lora_remove_5]) |
|
|
|
for i in range(6): |
|
globals()[f"lora_remove_{i}"].click( |
|
lambda enabled_loras, index=i: remove_from_enabled_loras(enabled_loras, index), |
|
[enabled_loras], |
|
[enabled_loras] |
|
) |
|
|
|
|
|
|
|
generate_images.click( |
|
generate_image, |
|
[ |
|
model, prompt, negative_prompt, fast_generation, enabled_loras, |
|
lora_slider_0, lora_slider_1, lora_slider_2, lora_slider_3, lora_slider_4, lora_slider_5, |
|
img2img_image, inpaint_image, canny_image, pose_image, depth_image, |
|
img2img_strength, inpaint_strength, canny_strength, pose_strength, depth_strength, |
|
resize_mode, |
|
scheduler, image_height, image_width, image_num_images_per_prompt, |
|
image_num_inference_steps, image_guidance_scale, image_seed, |
|
refiner, vae |
|
], |
|
[output_images] |
|
) |
|
with gr.Tab("SDXL"): |
|
gr.Label("To be implemented") |
|
with gr.Tab(label="π΅ Audio"): |
|
gr.Label("Coming soon!") |
|
with gr.Tab(label="π¬ Video"): |
|
gr.Label("Coming soon!") |
|
with gr.Tab(label="π Text"): |
|
gr.Label("Coming soon!") |
|
|
|
|
|
demo.launch( |
|
share=False, |
|
debug=True, |
|
) |
|
|