fastSD / app.py
thejagstudio's picture
Upload 61 files
510ee71 verified
raw
history blame
15.2 kB
import json
from argparse import ArgumentParser
import constants
from backend.controlnet import controlnet_settings_from_dict
from backend.models.gen_images import ImageFormat
from backend.models.lcmdiffusion_setting import DiffusionTask
from backend.upscale.tiled_upscale import generate_upscaled_image
from constants import APP_VERSION, DEVICE
from frontend.webui.image_variations_ui import generate_image_variations
from models.interface_types import InterfaceType
from paths import FastStableDiffusionPaths
from PIL import Image
from state import get_context, get_settings
from utils import show_system_info
from backend.device import get_device_name
parser = ArgumentParser(description=f"FAST SD CPU {constants.APP_VERSION}")
parser.add_argument(
"-s",
"--share",
action="store_true",
help="Create sharable link(Web UI)",
required=False,
)
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument(
"-g",
"--gui",
action="store_true",
help="Start desktop GUI",
)
group.add_argument(
"-w",
"--webui",
action="store_true",
help="Start Web UI",
)
group.add_argument(
"-a",
"--api",
action="store_true",
help="Start Web API server",
)
group.add_argument(
"-r",
"--realtime",
action="store_true",
help="Start realtime inference UI(experimental)",
)
group.add_argument(
"-v",
"--version",
action="store_true",
help="Version",
)
parser.add_argument(
"-b",
"--benchmark",
action="store_true",
help="Run inference benchmark on the selected device",
)
parser.add_argument(
"--lcm_model_id",
type=str,
help="Model ID or path,Default stabilityai/sd-turbo",
default="stabilityai/sd-turbo",
)
parser.add_argument(
"--openvino_lcm_model_id",
type=str,
help="OpenVINO Model ID or path,Default rupeshs/sd-turbo-openvino",
default="rupeshs/sd-turbo-openvino",
)
parser.add_argument(
"--prompt",
type=str,
help="Describe the image you want to generate",
default="",
)
parser.add_argument(
"--negative_prompt",
type=str,
help="Describe what you want to exclude from the generation",
default="",
)
parser.add_argument(
"--image_height",
type=int,
help="Height of the image",
default=512,
)
parser.add_argument(
"--image_width",
type=int,
help="Width of the image",
default=512,
)
parser.add_argument(
"--inference_steps",
type=int,
help="Number of steps,default : 1",
default=1,
)
parser.add_argument(
"--guidance_scale",
type=float,
help="Guidance scale,default : 1.0",
default=1.0,
)
parser.add_argument(
"--number_of_images",
type=int,
help="Number of images to generate ,default : 1",
default=1,
)
parser.add_argument(
"--seed",
type=int,
help="Seed,default : -1 (disabled) ",
default=-1,
)
parser.add_argument(
"--use_openvino",
action="store_true",
help="Use OpenVINO model",
)
parser.add_argument(
"--use_offline_model",
action="store_true",
help="Use offline model",
)
parser.add_argument(
"--use_safety_checker",
action="store_true",
help="Use safety checker",
)
parser.add_argument(
"--use_lcm_lora",
action="store_true",
help="Use LCM-LoRA",
)
parser.add_argument(
"--base_model_id",
type=str,
help="LCM LoRA base model ID,Default Lykon/dreamshaper-8",
default="Lykon/dreamshaper-8",
)
parser.add_argument(
"--lcm_lora_id",
type=str,
help="LCM LoRA model ID,Default latent-consistency/lcm-lora-sdv1-5",
default="latent-consistency/lcm-lora-sdv1-5",
)
parser.add_argument(
"-i",
"--interactive",
action="store_true",
help="Interactive CLI mode",
)
parser.add_argument(
"-t",
"--use_tiny_auto_encoder",
action="store_true",
help="Use tiny auto encoder for SD (TAESD)",
)
parser.add_argument(
"-f",
"--file",
type=str,
help="Input image for img2img mode",
default="",
)
parser.add_argument(
"--img2img",
action="store_true",
help="img2img mode; requires input file via -f argument",
)
parser.add_argument(
"--batch_count",
type=int,
help="Number of sequential generations",
default=1,
)
parser.add_argument(
"--strength",
type=float,
help="Denoising strength for img2img and Image variations",
default=0.3,
)
parser.add_argument(
"--sdupscale",
action="store_true",
help="Tiled SD upscale,works only for the resolution 512x512,(2x upscale)",
)
parser.add_argument(
"--upscale",
action="store_true",
help="EDSR SD upscale ",
)
parser.add_argument(
"--custom_settings",
type=str,
help="JSON file containing custom generation settings",
default=None,
)
parser.add_argument(
"--usejpeg",
action="store_true",
help="Images will be saved as JPEG format",
)
parser.add_argument(
"--noimagesave",
action="store_true",
help="Disable image saving",
)
parser.add_argument(
"--lora",
type=str,
help="LoRA model full path e.g D:\lora_models\CuteCartoon15V-LiberteRedmodModel-Cartoon-CuteCartoonAF.safetensors",
default=None,
)
parser.add_argument(
"--lora_weight",
type=float,
help="LoRA adapter weight [0 to 1.0]",
default=0.5,
)
args = parser.parse_args()
if args.version:
print(APP_VERSION)
exit()
# parser.print_help()
show_system_info()
print(f"Using device : {constants.DEVICE}")
if args.webui:
app_settings = get_settings()
else:
app_settings = get_settings()
print(f"Found {len(app_settings.lcm_models)} LCM models in config/lcm-models.txt")
print(
f"Found {len(app_settings.stable_diffsuion_models)} stable diffusion models in config/stable-diffusion-models.txt"
)
print(
f"Found {len(app_settings.lcm_lora_models)} LCM-LoRA models in config/lcm-lora-models.txt"
)
print(
f"Found {len(app_settings.openvino_lcm_models)} OpenVINO LCM models in config/openvino-lcm-models.txt"
)
if args.noimagesave:
app_settings.settings.generated_images.save_image = False
else:
app_settings.settings.generated_images.save_image = True
if not args.realtime:
# To minimize realtime mode dependencies
from backend.upscale.upscaler import upscale_image
from frontend.cli_interactive import interactive_mode
if args.gui:
from frontend.gui.ui import start_gui
print("Starting desktop GUI mode(Qt)")
start_gui(
[],
app_settings,
)
elif args.webui:
from frontend.webui.ui import start_webui
print("Starting web UI mode")
start_webui(
args.share,
)
elif args.realtime:
from frontend.webui.realtime_ui import start_realtime_text_to_image
print("Starting realtime text to image(EXPERIMENTAL)")
start_realtime_text_to_image(args.share)
elif args.api:
from backend.api.web import start_web_server
start_web_server()
else:
context = get_context(InterfaceType.CLI)
config = app_settings.settings
if args.use_openvino:
config.lcm_diffusion_setting.openvino_lcm_model_id = args.openvino_lcm_model_id
else:
config.lcm_diffusion_setting.lcm_model_id = args.lcm_model_id
config.lcm_diffusion_setting.prompt = args.prompt
config.lcm_diffusion_setting.negative_prompt = args.negative_prompt
config.lcm_diffusion_setting.image_height = args.image_height
config.lcm_diffusion_setting.image_width = args.image_width
config.lcm_diffusion_setting.guidance_scale = args.guidance_scale
config.lcm_diffusion_setting.number_of_images = args.number_of_images
config.lcm_diffusion_setting.inference_steps = args.inference_steps
config.lcm_diffusion_setting.strength = args.strength
config.lcm_diffusion_setting.seed = args.seed
config.lcm_diffusion_setting.use_openvino = args.use_openvino
config.lcm_diffusion_setting.use_tiny_auto_encoder = args.use_tiny_auto_encoder
config.lcm_diffusion_setting.use_lcm_lora = args.use_lcm_lora
config.lcm_diffusion_setting.lcm_lora.base_model_id = args.base_model_id
config.lcm_diffusion_setting.lcm_lora.lcm_lora_id = args.lcm_lora_id
config.lcm_diffusion_setting.diffusion_task = DiffusionTask.text_to_image.value
config.lcm_diffusion_setting.lora.enabled = False
config.lcm_diffusion_setting.lora.path = args.lora
config.lcm_diffusion_setting.lora.weight = args.lora_weight
config.lcm_diffusion_setting.lora.fuse = True
if config.lcm_diffusion_setting.lora.path:
config.lcm_diffusion_setting.lora.enabled = True
if args.usejpeg:
config.generated_images.format = ImageFormat.JPEG.value.upper()
if args.seed > -1:
config.lcm_diffusion_setting.use_seed = True
else:
config.lcm_diffusion_setting.use_seed = False
config.lcm_diffusion_setting.use_offline_model = args.use_offline_model
config.lcm_diffusion_setting.use_safety_checker = args.use_safety_checker
# Read custom settings from JSON file
custom_settings = {}
if args.custom_settings:
with open(args.custom_settings) as f:
custom_settings = json.load(f)
# Basic ControlNet settings; if ControlNet is enabled, an image is
# required even in txt2img mode
config.lcm_diffusion_setting.controlnet = None
controlnet_settings_from_dict(
config.lcm_diffusion_setting,
custom_settings,
)
# Interactive mode
if args.interactive:
# wrapper(interactive_mode, config, context)
config.lcm_diffusion_setting.lora.fuse = False
interactive_mode(config, context)
# Start of non-interactive CLI image generation
if args.img2img and args.file != "":
config.lcm_diffusion_setting.init_image = Image.open(args.file)
config.lcm_diffusion_setting.diffusion_task = DiffusionTask.image_to_image.value
elif args.img2img and args.file == "":
print("Error : You need to specify a file in img2img mode")
exit()
elif args.upscale and args.file == "" and args.custom_settings == None:
print("Error : You need to specify a file in SD upscale mode")
exit()
elif (
args.prompt == ""
and args.file == ""
and args.custom_settings == None
and not args.benchmark
):
print("Error : You need to provide a prompt")
exit()
if args.upscale:
# image = Image.open(args.file)
output_path = FastStableDiffusionPaths.get_upscale_filepath(
args.file,
2,
config.generated_images.format,
)
result = upscale_image(
context,
args.file,
output_path,
2,
)
# Perform Tiled SD upscale (EXPERIMENTAL)
elif args.sdupscale:
if args.use_openvino:
config.lcm_diffusion_setting.strength = 0.3
upscale_settings = None
if custom_settings != {}:
upscale_settings = custom_settings
filepath = args.file
output_format = config.generated_images.format
if upscale_settings:
filepath = upscale_settings["source_file"]
output_format = upscale_settings["output_format"].upper()
output_path = FastStableDiffusionPaths.get_upscale_filepath(
filepath,
2,
output_format,
)
generate_upscaled_image(
config,
filepath,
config.lcm_diffusion_setting.strength,
upscale_settings=upscale_settings,
context=context,
tile_overlap=32 if config.lcm_diffusion_setting.use_openvino else 16,
output_path=output_path,
image_format=output_format,
)
exit()
# If img2img argument is set and prompt is empty, use image variations mode
elif args.img2img and args.prompt == "":
for i in range(0, args.batch_count):
generate_image_variations(
config.lcm_diffusion_setting.init_image, args.strength
)
else:
if args.benchmark:
print("Initializing benchmark...")
bench_lcm_setting = config.lcm_diffusion_setting
bench_lcm_setting.prompt = "a cat"
bench_lcm_setting.use_tiny_auto_encoder = False
context.generate_text_to_image(
settings=config,
device=DEVICE,
)
latencies = []
print("Starting benchmark please wait...")
for _ in range(3):
context.generate_text_to_image(
settings=config,
device=DEVICE,
)
latencies.append(context.latency)
avg_latency = sum(latencies) / 3
bench_lcm_setting.use_tiny_auto_encoder = True
context.generate_text_to_image(
settings=config,
device=DEVICE,
)
latencies = []
for _ in range(3):
context.generate_text_to_image(
settings=config,
device=DEVICE,
)
latencies.append(context.latency)
avg_latency_taesd = sum(latencies) / 3
benchmark_name = ""
if config.lcm_diffusion_setting.use_openvino:
benchmark_name = "OpenVINO"
else:
benchmark_name = "PyTorch"
bench_model_id = ""
if bench_lcm_setting.use_openvino:
bench_model_id = bench_lcm_setting.openvino_lcm_model_id
elif bench_lcm_setting.use_lcm_lora:
bench_model_id = bench_lcm_setting.lcm_lora.base_model_id
else:
bench_model_id = bench_lcm_setting.lcm_model_id
benchmark_result = [
["Device", f"{DEVICE.upper()},{get_device_name()}"],
["Stable Diffusion Model", bench_model_id],
[
"Image Size ",
f"{bench_lcm_setting.image_width}x{bench_lcm_setting.image_height}",
],
[
"Inference Steps",
f"{bench_lcm_setting.inference_steps}",
],
[
"Benchmark Passes",
3,
],
[
"Average Latency",
f"{round(avg_latency,3)} sec",
],
[
"Average Latency(TAESD* enabled)",
f"{round(avg_latency_taesd,3)} sec",
],
]
print()
print(
f" FastSD Benchmark - {benchmark_name:8} "
)
print(f"-" * 80)
for benchmark in benchmark_result:
print(f"{benchmark[0]:35} - {benchmark[1]}")
print(f"-" * 80)
print("*TAESD - Tiny AutoEncoder for Stable Diffusion")
else:
for i in range(0, args.batch_count):
context.generate_text_to_image(
settings=config,
device=DEVICE,
)