Spaces:

adamelliotfields
/

diffusion

Running on Zero

App Files Files Community

adamelliotfields commited on Sep 13, 2024

Commit

4470520

verified ·

1 Parent(s): 75805bd

Loader and inference improvements

Browse files

Files changed (2) hide show

lib/inference.py +58 -40
lib/loader.py +33 -32

lib/inference.py CHANGED Viewed

@@ -21,8 +21,8 @@ from typing_extensions import ParamSpec
 from .loader import Loader
-__import__("transformers").logging.set_verbosity_error()
 __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
 T = TypeVar("T")
 P = ParamSpec("P")
@@ -45,17 +45,17 @@ async def async_call(fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T
         return await anyio.to_thread.run_sync(partial_fn)
-# parse prompts with arrays
-def parse_prompt(prompt: str) -> list[str]:
     arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
     if not arrays:
         return [prompt]
-    tokens = [item.split(",") for item in arrays]
-    combinations = list(product(*tokens))
-    prompts = []
     for combo in combinations:
         current_prompt = prompt
         for i, token in enumerate(combo):
@@ -71,8 +71,12 @@ def apply_style(prompt, style_id, negative=False):
     for style in STYLES:
         if style["id"] == style_id:
             if negative:
-                return prompt + " . " + style["negative_prompt"]
             else:
                 return style["prompt"].format(prompt=prompt)
     return prompt
@@ -97,12 +101,18 @@ def prepare_image(input, size=None):
 def gpu_duration(**kwargs):
-    duration = 15
     scale = kwargs.get("scale", 1)
     num_images = kwargs.get("num_images", 1)
     if scale == 4:
         duration += 5
-    return duration * num_images
 @spaces.GPU(duration=gpu_duration)
@@ -116,7 +126,7 @@ def generate(
     style=None,
     seed=None,
     model="Lykon/dreamshaper-8",
-    scheduler="DEIS 2M",
     width=512,
     height=512,
     guidance_scale=7.5,
@@ -140,16 +150,17 @@ def generate(
     if seed is None or seed < 0:
         seed = int(datetime.now().timestamp() * 1_000_000) % (2**64)
     EMBEDDINGS_TYPE = (
         ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NORMALIZED
         if clip_skip
         else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
     )
-    KIND = "img2img" if image_prompt is not None else "txt2img"
-    CURRENT_IMAGE = 1
     if ip_image:
         IP_ADAPTER = "full-face" if ip_face else "plus"
     else:
@@ -162,23 +173,22 @@ def generate(
         TQDM = True
     def callback_on_step_end(pipeline, step, timestep, latents):
-        nonlocal CURRENT_IMAGE
         if progress is None:
             return latents
         strength = denoising_strength if KIND == "img2img" else 1
         total_steps = min(int(inference_steps * strength), inference_steps)
-        current_step = step + 1
         progress(
-            (current_step, total_steps),
             desc=f"Generating image {CURRENT_IMAGE}/{num_images}",
         )
-        if current_step == total_steps:
-            CURRENT_IMAGE += 1
         return latents
     start = time.perf_counter()
     loader = Loader()
-    pipe, upscaler = loader.load(
         KIND,
         IP_ADAPTER,
         model,
@@ -191,6 +201,17 @@ def generate(
         TQDM,
     )
     # load embeddings and append to negative prompt
     embeddings_dir = os.path.join(os.path.dirname(__file__), "..", "embeddings")
     embeddings_dir = os.path.abspath(embeddings_dir)
@@ -201,11 +222,8 @@ def generate(
                 pretrained_model_name_or_path=f"{embeddings_dir}/{embedding}.pt",
                 token=f"<{embedding}>",
             )
-            # boost embeddings slightly
             negative_prompt = (
-                f"{negative_prompt}, (<{embedding}>)1.1"
-                if negative_prompt
-                else f"(<{embedding}>)1.1"
             )
         except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
             raise Error(f"Invalid embedding: <{embedding}>")
@@ -225,33 +243,33 @@ def generate(
     try:
         styled_negative_prompt = apply_style(negative_prompt, style, negative=True)
-        neg_embeds = compel(styled_negative_prompt)
     except PromptParser.ParsingException:
-        raise Error("ParsingException: Invalid negative prompt")
     for i in range(num_images):
         # seeded generator for each iteration
         generator = torch.Generator(device=pipe.device).manual_seed(current_seed)
         try:
-            all_positive_prompts = parse_prompt(positive_prompt)
             prompt_index = i % len(all_positive_prompts)
-            pos_prompt = all_positive_prompts[prompt_index]
-            styled_pos_prompt = apply_style(pos_prompt, style)
-            pos_embeds = compel(styled_pos_prompt)
-            pos_embeds, neg_embeds = compel.pad_conditioning_tensors_to_same_length(
-                [pos_embeds, neg_embeds]
             )
         except PromptParser.ParsingException:
-            raise Error("ParsingException: Invalid prompt")
         kwargs = {
             "width": width,
             "height": height,
             "generator": generator,
-            "prompt_embeds": pos_embeds,
             "guidance_scale": guidance_scale,
-            "negative_prompt_embeds": neg_embeds,
             "num_inference_steps": inference_steps,
             "output_type": "np" if scale > 1 else "pil",
         }
@@ -273,13 +291,13 @@ def generate(
             if scale > 1:
                 image = upscaler.predict(image)
             images.append((image, str(current_seed)))
         finally:
             pipe.unload_textual_inversion()
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-        # increment seed for next image
-        current_seed += 1
     diff = time.perf_counter() - start
     if Info:

 from .loader import Loader
 __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
+__import__("transformers").logging.set_verbosity_error()
 T = TypeVar("T")
 P = ParamSpec("P")
         return await anyio.to_thread.run_sync(partial_fn)
+def parse_prompt_with_arrays(prompt: str) -> list[str]:
     arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
     if not arrays:
         return [prompt]
+    tokens = [item.split(",") for item in arrays]  # [("a", "b"), ("1", "2")]
+    combinations = list(product(*tokens))  # [("a", "1"), ("a", "2"), ("b", "1"), ("b", "2")]
+    # find all the arrays in the prompt and replace them with tokens
+    prompts = []
     for combo in combinations:
         current_prompt = prompt
         for i, token in enumerate(combo):
     for style in STYLES:
         if style["id"] == style_id:
             if negative:
+                return (
+                    # prepend our negative prompt to the style's negative prompt
+                    f"{prompt}, {style['negative_prompt']}" if prompt else style["negative_prompt"]
+                )
             else:
+                # inject our positive prompt into the style prompt
                 return style["prompt"].format(prompt=prompt)
     return prompt
 def gpu_duration(**kwargs):
+    loading = 20
+    duration = 10
+    width = kwargs.get("width", 512)
+    height = kwargs.get("height", 512)
     scale = kwargs.get("scale", 1)
     num_images = kwargs.get("num_images", 1)
+    size = width * height
+    if size > 500_000:
+        duration += 5
     if scale == 4:
         duration += 5
+    return loading + (duration * num_images)
 @spaces.GPU(duration=gpu_duration)
     style=None,
     seed=None,
     model="Lykon/dreamshaper-8",
+    scheduler="DDIM",
     width=512,
     height=512,
     guidance_scale=7.5,
     if seed is None or seed < 0:
         seed = int(datetime.now().timestamp() * 1_000_000) % (2**64)
+    CURRENT_STEP = 0
+    CURRENT_IMAGE = 1
+    KIND = "img2img" if image_prompt is not None else "txt2img"
     EMBEDDINGS_TYPE = (
         ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NORMALIZED
         if clip_skip
         else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
     )
     if ip_image:
         IP_ADAPTER = "full-face" if ip_face else "plus"
     else:
         TQDM = True
     def callback_on_step_end(pipeline, step, timestep, latents):
+        nonlocal CURRENT_STEP, CURRENT_IMAGE
         if progress is None:
             return latents
         strength = denoising_strength if KIND == "img2img" else 1
         total_steps = min(int(inference_steps * strength), inference_steps)
+        CURRENT_STEP = step + 1
         progress(
+            (CURRENT_STEP, total_steps),
             desc=f"Generating image {CURRENT_IMAGE}/{num_images}",
         )
         return latents
     start = time.perf_counter()
     loader = Loader()
+    loader.load(
         KIND,
         IP_ADAPTER,
         model,
         TQDM,
     )
+    if loader.pipe is None:
+        raise Error(f"RuntimeError: Error loading {model}")
+    pipe = loader.pipe
+    upscaler = None
+    if scale == 2:
+        upscaler = loader.upscaler_2x
+    if scale == 4:
+        upscaler = loader.upscaler_4x
     # load embeddings and append to negative prompt
     embeddings_dir = os.path.join(os.path.dirname(__file__), "..", "embeddings")
     embeddings_dir = os.path.abspath(embeddings_dir)
                 pretrained_model_name_or_path=f"{embeddings_dir}/{embedding}.pt",
                 token=f"<{embedding}>",
             )
             negative_prompt = (
+                f"{negative_prompt}, <{embedding}>" if negative_prompt else f"<{embedding}>"
             )
         except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
             raise Error(f"Invalid embedding: <{embedding}>")
     try:
         styled_negative_prompt = apply_style(negative_prompt, style, negative=True)
+        negative_embeds = compel(styled_negative_prompt)
     except PromptParser.ParsingException:
+        raise Error("ValueError: Invalid negative prompt")
     for i in range(num_images):
         # seeded generator for each iteration
         generator = torch.Generator(device=pipe.device).manual_seed(current_seed)
         try:
+            all_positive_prompts = parse_prompt_with_arrays(positive_prompt)
             prompt_index = i % len(all_positive_prompts)
+            prompt = all_positive_prompts[prompt_index]
+            prompt = apply_style(prompt, style)
+            positive_embeds = compel(prompt)
+            positive_embeds, negative_embeds = compel.pad_conditioning_tensors_to_same_length(
+                [positive_embeds, negative_embeds]
             )
         except PromptParser.ParsingException:
+            raise Error("ValueError: Invalid prompt")
         kwargs = {
             "width": width,
             "height": height,
             "generator": generator,
+            "prompt_embeds": positive_embeds,
             "guidance_scale": guidance_scale,
+            "negative_prompt_embeds": negative_embeds,
             "num_inference_steps": inference_steps,
             "output_type": "np" if scale > 1 else "pil",
         }
             if scale > 1:
                 image = upscaler.predict(image)
             images.append((image, str(current_seed)))
+            current_seed += 1
+        except Exception as e:
+            raise Error(f"RuntimeError: {e}")
         finally:
             pipe.unload_textual_inversion()
+            CURRENT_STEP = 0
+            CURRENT_IMAGE += 1
     diff = time.perf_counter() - start
     if Info:

lib/loader.py CHANGED Viewed

@@ -27,13 +27,11 @@ class Loader:
                 cls._instance = super().__new__(cls)
                 cls._instance.pipe = None
                 cls._instance.model = None
-                cls._instance.upscaler = None
                 cls._instance.ip_adapter = None
         return cls._instance
-    def _should_unload_upscaler(self, scale=1):
-        return self.upscaler is not None and scale == 1
     def _should_unload_ip_adapter(self, ip_adapter=""):
         return self.ip_adapter is not None and not ip_adapter
@@ -78,25 +76,17 @@ class Loader:
         torch.cuda.reset_peak_memory_stats()
         torch.cuda.synchronize()
-    def _unload(self, kind="", model="", ip_adapter="", scale=1):
         to_unload = []
-        if self._should_unload_upscaler(scale):
-            to_unload.append("upscaler")
         if self._should_unload_ip_adapter(ip_adapter):
             self._unload_ip_adapter()
             to_unload.append("ip_adapter")
         if self._should_unload_pipeline(kind, model):
             to_unload.append("model")
             to_unload.append("pipe")
         for component in to_unload:
             delattr(self, component)
         self._flush()
         for component in to_unload:
             setattr(self, component, None)
@@ -112,35 +102,46 @@ class Loader:
             self.pipe.set_ip_adapter_scale(0.5)
             self.ip_adapter = ip_adapter
-    def _load_upscaler(self, scale=1, device=None):
-        if scale > 1 and self.upscaler is None:
-            print(f"Loading {scale}x upscaler...")
-            self.upscaler = RealESRGAN(scale, device)
-            self.upscaler.load_weights()
-    def _load_pipeline(self, kind, model, tqdm, device, **kwargs):
         pipeline = Config.PIPELINES[kind]
         if self.pipe is None:
-            print(f"Loading {model}...")
             try:
                 if model.lower() in Config.MODEL_CHECKPOINTS.keys():
                     self.pipe = pipeline.from_single_file(
                         f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
                         **kwargs,
-                    ).to(device)
                 else:
-                    self.pipe = pipeline.from_pretrained(model, **kwargs).to(device)
-                self.model = model
             except Exception as e:
                 print(f"Error loading {model}: {e}")
                 self.model = None
                 self.pipe = None
                 return
         if not isinstance(self.pipe, pipeline):
-            self.pipe = pipeline.from_pipe(self.pipe).to(device)
-        self.pipe.set_progress_bar_config(disable=not tqdm)
     def _load_vae(self, taesd=False, model=""):
         vae_type = type(self.pipe.vae)
@@ -251,14 +252,15 @@ class Loader:
                 else torch.float16
             )
         else:
             pipe_kwargs["torch_dtype"] = torch.float16
-        self._unload(kind, model, ip_adapter, scale)
-        self._load_pipeline(kind, model, tqdm, device, **pipe_kwargs)
         # error loading model
         if self.pipe is None:
-            return None, None
         same_scheduler = isinstance(self.pipe.scheduler, Config.SCHEDULERS[scheduler])
         same_karras = (
@@ -279,5 +281,4 @@ class Loader:
         self._load_vae(taesd, model)
         self._load_deepcache(deepcache)
         self._load_ip_adapter(ip_adapter)
-        self._load_upscaler(scale, device)
-        return self.pipe, self.upscaler

                 cls._instance = super().__new__(cls)
                 cls._instance.pipe = None
                 cls._instance.model = None
                 cls._instance.ip_adapter = None
+                cls._instance.upscaler_2x = None
+                cls._instance.upscaler_4x = None
         return cls._instance
     def _should_unload_ip_adapter(self, ip_adapter=""):
         return self.ip_adapter is not None and not ip_adapter
         torch.cuda.reset_peak_memory_stats()
         torch.cuda.synchronize()
+    def _unload(self, kind="", model="", ip_adapter=""):
         to_unload = []
         if self._should_unload_ip_adapter(ip_adapter):
             self._unload_ip_adapter()
             to_unload.append("ip_adapter")
         if self._should_unload_pipeline(kind, model):
             to_unload.append("model")
             to_unload.append("pipe")
         for component in to_unload:
             delattr(self, component)
         self._flush()
         for component in to_unload:
             setattr(self, component, None)
             self.pipe.set_ip_adapter_scale(0.5)
             self.ip_adapter = ip_adapter
+    def _load_upscaler(self, scale=1):
+        if scale == 2 and self.upscaler_2x is None:
+            try:
+                print("Loading 2x upscaler...")
+                self.upscaler_2x = RealESRGAN(2, "cuda")
+                self.upscaler_2x.load_weights()
+            except Exception as e:
+                print(f"Error loading 2x upscaler: {e}")
+                self.upscaler_2x = None
+        if scale == 4 and self.upscaler_4x is None:
+            try:
+                print("Loading 4x upscaler...")
+                self.upscaler_4x = RealESRGAN(4, "cuda")
+                self.upscaler_4x.load_weights()
+            except Exception as e:
+                print(f"Error loading 4x upscaler: {e}")
+                self.upscaler_4x = None
+    def _load_pipeline(self, kind, model, tqdm, **kwargs):
         pipeline = Config.PIPELINES[kind]
         if self.pipe is None:
             try:
+                print(f"Loading {model}...")
+                self.model = model
                 if model.lower() in Config.MODEL_CHECKPOINTS.keys():
                     self.pipe = pipeline.from_single_file(
                         f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
                         **kwargs,
+                    ).to("cuda")
                 else:
+                    self.pipe = pipeline.from_pretrained(model, **kwargs).to("cuda")
             except Exception as e:
                 print(f"Error loading {model}: {e}")
                 self.model = None
                 self.pipe = None
                 return
         if not isinstance(self.pipe, pipeline):
+            self.pipe = pipeline.from_pipe(self.pipe).to("cuda")
+        if self.pipe is not None:
+            self.pipe.set_progress_bar_config(disable=not tqdm)
     def _load_vae(self, taesd=False, model=""):
         vae_type = type(self.pipe.vae)
                 else torch.float16
             )
         else:
+            # defaults to float32
             pipe_kwargs["torch_dtype"] = torch.float16
+        self._unload(kind, model, ip_adapter)
+        self._load_pipeline(kind, model, tqdm, **pipe_kwargs)
         # error loading model
         if self.pipe is None:
+            return
         same_scheduler = isinstance(self.pipe.scheduler, Config.SCHEDULERS[scheduler])
         same_karras = (
         self._load_vae(taesd, model)
         self._load_deepcache(deepcache)
         self._load_ip_adapter(ip_adapter)
+        self._load_upscaler(scale)