Spaces:
Running
Running
import gradio as gr | |
import spaces | |
import torch | |
from PIL import Image | |
from compel import Compel, DiffusersTextualInversionManager | |
from diffusers import DiffusionPipeline, StableDiffusionPipeline | |
from diffusers.utils import make_image_grid | |
from src.const import DIFFUSERS_MODEL_IDS, EXTERNAL_MODEL_MAPPING, DEVICE | |
def load_pipeline(model_id, use_model_offload, safety_checker): | |
# Diffusers リポジトリ内のモデル | |
if model_id in DIFFUSERS_MODEL_IDS: | |
pipe = DiffusionPipeline.from_pretrained( | |
model_id, | |
torch_dtype=torch.float16, | |
) | |
# CIVITAI 系列由来のモデル | |
else: | |
pipe = DiffusionPipeline.from_pretrained( | |
EXTERNAL_MODEL_MAPPING[model_id], | |
torch_dtype=torch.float16, | |
) | |
# Load Textual Inversion | |
pipe.load_textual_inversion("checkpoints/embeddings/BadNegAnatomyV1 neg.pt", token='BadNegAnatomyV1-neg') | |
pipe.load_textual_inversion("checkpoints/embeddings/Deep Negative V1 75T.pt", token='DeepNegative') | |
pipe.load_textual_inversion("checkpoints/embeddings/easynegative.safetensors", token='EasyNegative') | |
pipe.load_textual_inversion("checkpoints/embeddings/Negative Hand Embedding.pt", token='negative_hand-neg') | |
# Load LoRA | |
pipe.load_lora_weights("checkpoints/lora/detailed style SD1.5.safetensors", adapter_name='detail') | |
pipe.load_lora_weights("checkpoints/lora/perfection style SD1.5.safetensors", adapter_name='perfection') | |
pipe.load_lora_weights("checkpoints/lora/Hand v3 SD1.5.safetensors", adapter_name='hands') | |
pipe.set_adapters(['detail', 'hands'], adapter_weights=[0.5, 0.5]) | |
# VRAM が少ないとき用の対策 | |
if use_model_offload: | |
pipe.enable_model_cpu_offload() | |
else: | |
pipe = pipe.to(DEVICE) | |
if not safety_checker: | |
pipe.safety_checker = None | |
return pipe | |
def inference( | |
prompt: str, | |
model_id: str = "stabilityai/stable-diffusion-3-medium-diffusers", | |
negative_prompt: str = "", | |
width: int = 512, | |
height: int = 512, | |
guidance_scale: float = 7.5, | |
num_inference_steps: int = 50, | |
num_images: int = 4, | |
safety_checker: bool = True, | |
use_model_offload: bool = False, | |
seed: int = 8888, | |
progress=gr.Progress(track_tqdm=True), | |
) -> Image.Image: | |
progress(0, 'Loading pipeline...') | |
pipe = load_pipeline(model_id, use_model_offload, safety_checker) | |
# Seed 固定 | |
generator = torch.Generator(device=DEVICE).manual_seed(seed) | |
if isinstance(pipe, StableDiffusionPipeline): | |
# For Compel | |
textual_inversion_manager = DiffusersTextualInversionManager(pipe) | |
compel_procs = Compel( | |
tokenizer=pipe.tokenizer, | |
text_encoder=pipe.text_encoder, | |
textual_inversion_manager=textual_inversion_manager, | |
truncate_long_prompts=False, | |
) | |
prompt_embed = compel_procs(prompt) | |
negative_prompt_embed = compel_procs(negative_prompt) | |
prompt_embed, negative_prompt_embed = compel_procs.pad_conditioning_tensors_to_same_length( | |
[prompt_embed, negative_prompt_embed] | |
) | |
progress(0.3, 'Generating images...') | |
images = pipe( | |
prompt_embeds=prompt_embed, | |
negative_prompt_embeds=negative_prompt_embed, | |
width=width, | |
height=height, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
num_images_per_prompt=num_images, | |
generator=generator, | |
).images | |
else: | |
progress(0.3, 'Generating images...') | |
images = pipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
width=width, | |
height=height, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
num_images_per_prompt=num_images, | |
generator=generator, | |
).images | |
progress(0.9, f'Done generating {num_images} images') | |
if num_images % 2 == 1: | |
image = make_image_grid(images, rows=num_images, cols=1) | |
else: | |
image = make_image_grid(images, rows=2, cols=num_images // 2) | |
return image | |