Real-Time-Latent-Consistency-Model-Text-To-Image

Runtime error

App Files Files Community

radames commited on Nov 9, 2023

Commit

ec8114e

•

1 Parent(s): 94890d6

fix

Browse files

Files changed (2) hide show

app-txt2imglora.py +20 -20
static/txt2imglora.html +4 -30

app-txt2imglora.py CHANGED Viewed

@@ -14,7 +14,7 @@ from fastapi.responses import (
 )
 from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
-from compel import Compel, ReturnedEmbeddingsType
 import torch
 try:
@@ -35,11 +35,10 @@ MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
-WIDTH = 768
-HEIGHT = 768
-# disable tiny autoencoder for better quality speed tradeoff
-USE_TINY_AUTOENCODER = False
 # check if MPS is available OSX only M1/M2/M3 chips
 mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
@@ -49,7 +48,7 @@ device = torch.device(
 )
 torch_device = device
 # change to torch.float16 to save GPU memory
-torch_dtype = torch.float16
 print(f"TIMEOUT: {TIMEOUT}")
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
@@ -61,17 +60,15 @@ if mps_available:
     torch_device = "cpu"
     torch_dtype = torch.float32
-model_id = "stabilityai/stable-diffusion-xl-base-1.0"
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(model_id)
 else:
     pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None)
-if USE_TINY_AUTOENCODER:
-    pipe.vae = AutoencoderTiny.from_pretrained(
-        "madebyollin/taesd", torch_dtype=torch_dtype, use_safetensors=True
-    )
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 pipe.set_progress_bar_config(disable=True)
 pipe.to(device=torch_device, dtype=torch_dtype).to(device)
@@ -86,15 +83,19 @@ if TORCH_COMPILE:
     pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
     pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
 # Load LCM LoRA
-pipe.load_lora_weights("lcm-sd/lcm-sdxl-lora", weight_name="lcm_sdxl_lora.safetensors", adapter_name="lcm")
 compel_proc = Compel(
-    tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
-    text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
-    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-    requires_pooled=[False, True],
 )
 user_queue_map = {}
@@ -112,10 +113,9 @@ class InputParams(BaseModel):
 def predict(params: InputParams):
     generator = torch.manual_seed(params.seed)
-    prompt_embeds, pooled_prompt_embeds = compel_proc(params.prompt)
     results = pipe(
         prompt_embeds=prompt_embeds,
-        pooled_prompt_embeds=pooled_prompt_embeds,
         generator=generator,
         num_inference_steps=params.steps,
         guidance_scale=params.guidance_scale,

 )
 from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
+from compel import Compel
 import torch
 try:
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+WIDTH = 512
+HEIGHT = 512
 # check if MPS is available OSX only M1/M2/M3 chips
 mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
 )
 torch_device = device
 # change to torch.float16 to save GPU memory
+torch_dtype = torch.float
 print(f"TIMEOUT: {TIMEOUT}")
 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
     torch_device = "cpu"
     torch_dtype = torch.float32
+model_id = "wavymulder/Analog-Diffusion"
+lcm_lora_id = "lcm-sd/lcm-sd1.5-lora"
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(model_id)
 else:
     pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None)
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 pipe.set_progress_bar_config(disable=True)
 pipe.to(device=torch_device, dtype=torch_dtype).to(device)
     pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
     pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
 # Load LCM LoRA
+pipe.load_lora_weights(
+    lcm_lora_id,
+    weight_name="lcm_sd_lora.safetensors",
+    adapter_name="lcm",
+    use_auth_token=HF_TOKEN,
+)
 compel_proc = Compel(
+    tokenizer=pipe.tokenizer,
+    text_encoder=pipe.text_encoder,
+    truncate_long_prompts=False,
 )
 user_queue_map = {}
 def predict(params: InputParams):
     generator = torch.manual_seed(params.seed)
+    prompt_embeds = compel_proc(params.prompt)
     results = pipe(
         prompt_embeds=prompt_embeds,
         generator=generator,
         num_inference_steps=params.steps,
         guidance_scale=params.guidance_scale,

static/txt2imglora.html CHANGED Viewed

@@ -74,8 +74,7 @@
             }
             async function promptUpdateStream(e) {
-                const dimension = getValue("input[name=dimension]:checked");
-                const [WIDTH, HEIGHT] = JSON.parse(dimension);
                 websocket.send(JSON.stringify({
                     "seed": getValue("#seed"),
                     "prompt": getValue("#prompt"),
@@ -210,14 +209,8 @@
                 using
                 <a href="https://github.com/huggingface/diffusers/tree/main/examples/community#latent-consistency-pipeline"
                     target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
-                stream server.
-            </p>
-            <p class="text-sm">
-                There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU, affecting
-                real-time performance. Maximum queue size is 10. <a
-                    href="https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model?duplicate=true"
-                    target="_blank" class="text-blue-500 underline hover:no-underline">Duplicate</a> and run it on your
-                own GPU.
             </p>
         </article>
         <div>
@@ -230,7 +223,7 @@
             <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
                 <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1  outline-none dark:text-black"
                     title=" Start your session and type your prompt here, you can see the result in real-time."
-                    placeholder="Add your prompt here...">Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5, cinematic, masterpiece</textarea>
             </div>
         </div>
@@ -238,25 +231,6 @@
             <details>
                 <summary class="font-medium cursor-pointer">Advanced Options</summary>
                 <form class="grid grid-cols-3 items-center gap-3 py-3" id="params" action="">
-                    <label class="text-sm font-medium" for="dimension">Image Dimensions</label>
-                    <div class="col-span-2 flex gap-2">
-                        <div class="flex gap-1">
-                            <input type="radio" id="dimension512" name="dimension" value="[512,512]"
-                                class="cursor-pointer">
-                            <label for="dimension512" class="text-sm cursor-pointer">512x512</label>
-                        </div>
-                        <div class="flex gap-1">
-                            <input type="radio" id="dimension768" name="dimension" value="[768,768]"
-                                lass="cursor-pointer">
-                            <label for="dimension768" class="text-sm cursor-pointer">768x768</label>
-                        </div>
-                        <div class="flex gap-1">
-                            <input type="radio" id="dimension1024" name="dimension" value="[1024,1024]" checked
-                                class="cursor-pointer">
-                            <label for="dimension1024" class="text-sm cursor-pointer">1024x1024</label>
-                        </div>
-                    </div>
-                    <!--  -->
                     <label class="text-sm font-medium " for="steps">Inference Steps
                     </label>
                     <input type="range" id="steps" name="steps" min="1" max="20" value="4"

             }
             async function promptUpdateStream(e) {
+                const [WIDTH, HEIGHT] = [512, 512];
                 websocket.send(JSON.stringify({
                     "seed": getValue("#seed"),
                     "prompt": getValue("#prompt"),
                 using
                 <a href="https://github.com/huggingface/diffusers/tree/main/examples/community#latent-consistency-pipeline"
                     target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
+                stream server. Featuring <a href="https://huggingface.co/wavymulder/Analog-Diffusion" target="_blank"
+                    class="text-blue-500 underline hover:no-underline">Analog Diffusion</a> Model.
             </p>
         </article>
         <div>
             <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
                 <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1  outline-none dark:text-black"
                     title=" Start your session and type your prompt here, you can see the result in real-time."
+                    placeholder="Add your prompt here...">Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes</textarea>
             </div>
         </div>
             <details>
                 <summary class="font-medium cursor-pointer">Advanced Options</summary>
                 <form class="grid grid-cols-3 items-center gap-3 py-3" id="params" action="">
                     <label class="text-sm font-medium " for="steps">Inference Steps
                     </label>
                     <input type="range" id="steps" name="steps" min="1" max="20" value="4"