Real-Time-SD-Turbo

Sleeping

App Files Files Community

radames commited on Dec 18, 2023

Commit

4b58964

1 Parent(s): 592470d

using sfast go brrrr

Browse files

Files changed (6) hide show

app_init.py +1 -1
config.py +7 -0
pipelines/controlnelSD21Turbo.py +29 -11
pipelines/controlnetSDXLTurbo.py +42 -15
pipelines/img2imgSD21Turbo.py +16 -4
requirements.txt +2 -1

app_init.py CHANGED Viewed

@@ -110,11 +110,11 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
                     params = await user_data.get_latest_data(user_id)
                     if not vars(params) or params.__dict__ == last_params.__dict__:
                         await websocket.send_json({"status": "send_frame"})
-                        await asyncio.sleep(0.1)
                         continue
                     last_params = params
                     image = pipeline.predict(params)
                     if image is None:
                         await websocket.send_json({"status": "send_frame"})
                         continue

                     params = await user_data.get_latest_data(user_id)
                     if not vars(params) or params.__dict__ == last_params.__dict__:
                         await websocket.send_json({"status": "send_frame"})
                         continue
                     last_params = params
                     image = pipeline.predict(params)
                     if image is None:
                         await websocket.send_json({"status": "send_frame"})
                         continue

config.py CHANGED Viewed

@@ -16,6 +16,7 @@ class Args(NamedTuple):
     pipeline: str
     ssl_certfile: str
     ssl_keyfile: str
     compel: bool = False
     debug: bool = False
@@ -102,6 +103,12 @@ parser.add_argument(
     default=False,
     help="Compel",
 )
 parser.set_defaults(taesd=USE_TAESD)
 args = Args(**vars(parser.parse_args()))

     pipeline: str
     ssl_certfile: str
     ssl_keyfile: str
+    sfast: bool
     compel: bool = False
     debug: bool = False
     default=False,
     help="Compel",
 )
+parser.add_argument(
+    "--sfast",
+    action="store_true",
+    default=False,
+    help="Enable Stable Fast",
+)
 parser.set_defaults(taesd=USE_TAESD)
 args = Args(**vars(parser.parse_args()))

pipelines/controlnelSD21Turbo.py CHANGED Viewed

@@ -180,6 +180,19 @@ class Pipeline:
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
             ).to(device)
         self.canny_torch = SobelOperator(device=device)
         self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
@@ -188,14 +201,15 @@ class Pipeline:
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
-        if psutil.virtual_memory().total < 64 * 1024**3:
-            self.pipe.enable_attention_slicing()
-        self.pipe.compel_proc = Compel(
-            tokenizer=self.pipe.tokenizer,
-            text_encoder=self.pipe.text_encoder,
-            truncate_long_prompts=True,
-        )
         if args.taesd:
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
@@ -216,7 +230,13 @@ class Pipeline:
     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
         generator = torch.manual_seed(params.seed)
-        prompt_embeds = self.pipe.compel_proc(params.prompt)
         control_image = self.canny_torch(
             params.image, params.canny_low_threshold, params.canny_high_threshold
         )
@@ -224,10 +244,10 @@ class Pipeline:
         strength = params.strength
         if int(steps * strength) < 1:
             steps = math.ceil(1 / max(0.10, strength))
-        last_time = time.time()
         results = self.pipe(
             image=params.image,
             control_image=control_image,
             prompt_embeds=prompt_embeds,
             generator=generator,
             strength=strength,
@@ -240,8 +260,6 @@ class Pipeline:
             control_guidance_start=params.controlnet_start,
             control_guidance_end=params.controlnet_end,
         )
-        print(f"Time taken: {time.time() - last_time}")
         nsfw_content_detected = (
             results.nsfw_content_detected[0]
             if "nsfw_content_detected" in results

             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
             ).to(device)
+        if args.sfast:
+            from sfast.compilers.stable_diffusion_pipeline_compiler import (
+                compile,
+                CompilationConfig,
+            )
+            config = CompilationConfig.Default()
+            config.enable_xformers = True
+            config.enable_triton = True
+            config.enable_cuda_graph = True
+            self.pipe = compile(self.pipe, config=config)
         self.canny_torch = SobelOperator(device=device)
         self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
+        if args.compel:
+            from compel import Compel
+            self.pipe.compel_proc = Compel(
+                tokenizer=self.pipe.tokenizer,
+                text_encoder=self.pipe.text_encoder,
+                truncate_long_prompts=True,
+            )
         if args.taesd:
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
         generator = torch.manual_seed(params.seed)
+        prompt = params.prompt
+        prompt_embeds = None
+        if hasattr(self.pipe, "compel_proc"):
+            prompt_embeds = self.pipe.compel_proc(
+                [params.prompt, params.negative_prompt]
+            )
+            prompt = None
         control_image = self.canny_torch(
             params.image, params.canny_low_threshold, params.canny_high_threshold
         )
         strength = params.strength
         if int(steps * strength) < 1:
             steps = math.ceil(1 / max(0.10, strength))
         results = self.pipe(
             image=params.image,
             control_image=control_image,
+            prompt=prompt,
             prompt_embeds=prompt_embeds,
             generator=generator,
             strength=strength,
             control_guidance_start=params.controlnet_start,
             control_guidance_end=params.controlnet_end,
         )
         nsfw_content_detected = (
             results.nsfw_content_detected[0]
             if "nsfw_content_detected" in results

pipelines/controlnetSDXLTurbo.py CHANGED Viewed

@@ -185,20 +185,31 @@ class Pipeline:
             )
         self.canny_torch = SobelOperator(device=device)
         self.pipe.set_progress_bar_config(disable=True)
         self.pipe.to(device=device, dtype=torch_dtype).to(device)
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
-        if psutil.virtual_memory().total < 64 * 1024**3:
-            self.pipe.enable_attention_slicing()
-        self.pipe.compel_proc = Compel(
-            tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
-            text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
-            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-            requires_pooled=[False, True],
-        )
         if args.taesd:
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
@@ -220,9 +231,23 @@ class Pipeline:
     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
         generator = torch.manual_seed(params.seed)
-        prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
-            [params.prompt, params.negative_prompt]
-        )
         control_image = self.canny_torch(
             params.image, params.canny_low_threshold, params.canny_high_threshold
         )
@@ -234,10 +259,12 @@ class Pipeline:
         results = self.pipe(
             image=params.image,
             control_image=control_image,
-            prompt_embeds=prompt_embeds[0:1],
-            pooled_prompt_embeds=pooled_prompt_embeds[0:1],
-            negative_prompt_embeds=prompt_embeds[1:2],
-            negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
             generator=generator,
             strength=strength,
             num_inference_steps=steps,

             )
         self.canny_torch = SobelOperator(device=device)
+        if args.sfast:
+            from sfast.compilers.stable_diffusion_pipeline_compiler import (
+                compile,
+                CompilationConfig,
+            )
+            config = CompilationConfig.Default()
+            config.enable_xformers = True
+            config.enable_triton = True
+            config.enable_cuda_graph = True
+            self.pipe = compile(self.pipe, config=config)
         self.pipe.set_progress_bar_config(disable=True)
         self.pipe.to(device=device, dtype=torch_dtype).to(device)
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
+        if args.compel:
+            self.pipe.compel_proc = Compel(
+                tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
+                text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
+                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+                requires_pooled=[False, True],
+            )
         if args.taesd:
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
         generator = torch.manual_seed(params.seed)
+        prompt = params.prompt
+        negative_prompt = params.negative_prompt
+        prompt_embeds = None
+        pooled_prompt_embeds = None
+        negative_prompt_embeds = None
+        negative_pooled_prompt_embeds = None
+        if hasattr(self.pipe, "compel_proc"):
+            _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
+                [params.prompt, params.negative_prompt]
+            )
+            prompt = None
+            negative_prompt = None
+            prompt_embeds = _prompt_embeds[0:1]
+            pooled_prompt_embeds = pooled_prompt_embeds[0:1]
+            negative_prompt_embeds = _prompt_embeds[1:2]
+            negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
         control_image = self.canny_torch(
             params.image, params.canny_low_threshold, params.canny_high_threshold
         )
         results = self.pipe(
             image=params.image,
             control_image=control_image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            prompt_embeds=prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
             generator=generator,
             strength=strength,
             num_inference_steps=steps,

pipelines/img2imgSD21Turbo.py CHANGED Viewed

@@ -14,6 +14,10 @@ from config import Args
 from pydantic import BaseModel, Field
 from PIL import Image
 import math
 base_model = "stabilityai/sd-turbo"
 taesd_model = "madebyollin/taesd"
@@ -104,15 +108,23 @@ class Pipeline:
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
             ).to(device)
         self.pipe.set_progress_bar_config(disable=True)
         self.pipe.to(device=device, dtype=torch_dtype)
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
-        # check if computer has less than 64GB of RAM using sys or os
-        if psutil.virtual_memory().total < 64 * 1024**3:
-            self.pipe.enable_attention_slicing()
         if args.torch_compile:
             print("Running torch compile")
             self.pipe.unet = torch.compile(

 from pydantic import BaseModel, Field
 from PIL import Image
 import math
+from sfast.compilers.stable_diffusion_pipeline_compiler import (
+    compile,
+    CompilationConfig,
+)
 base_model = "stabilityai/sd-turbo"
 taesd_model = "madebyollin/taesd"
                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
             ).to(device)
+        if args.sfast:
+            from sfast.compilers.stable_diffusion_pipeline_compiler import (
+                compile,
+                CompilationConfig,
+            )
+            config = CompilationConfig.Default()
+            config.enable_xformers = True
+            config.enable_triton = True
+            config.enable_cuda_graph = True
+            self.pipe = compile(self.pipe, config=config)
         self.pipe.set_progress_bar_config(disable=True)
         self.pipe.to(device=device, dtype=torch_dtype)
         if device.type != "mps":
             self.pipe.unet.to(memory_format=torch.channels_last)
         if args.torch_compile:
             print("Running torch compile")
             self.pipe.unet = torch.compile(

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ compel==2.0.2
 controlnet-aux==0.0.7
 peft==0.6.0
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
-markdown2

 controlnet-aux==0.0.7
 peft==0.6.0
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
+markdown2
+stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/v0.0.15.post1/stable_fast-0.0.15.post1+torch211cu121-cp310-cp310-manylinux2014_x86_64.whl