Spaces:

tight-inversion
/

tight-inversion-pulid-demo

Running on Zero

App Files Files Community

tight-inversion commited on 4 days ago

Commit

10d3d92

1 Parent(s): 577e44b

Fix encoding

Browse files

Files changed (1) hide show

app.py +26 -44

app.py CHANGED Viewed

@@ -28,7 +28,6 @@ def get_models(name: str, device: torch.device, offload: bool, fp8: bool):
     ae = load_ae(name, device=device)
     return model, ae, t5, clip
 class FluxGenerator:
     def __init__(self, model_name: str, device: str, offload: bool, aggressive_offload: bool, args):
         self.device = torch.device(device)
@@ -44,47 +43,7 @@ class FluxGenerator:
         self.pulid_model = PuLIDPipeline(self.model, device='cuda', weight_dtype=torch.bfloat16)
         self.pulid_model.load_pretrain(args.pretrained_model)
-    # function to encode an image into latents
-    def encode_image_to_latents(self, img, opts):
-        """
-        Opposite of decode: Takes a PIL image and encodes it into latents (x).
-        """
-        t0 = time.perf_counter()
-        # Resize if necessary, or use opts.height / opts.width if you want a fixed size:
-        img = img.resize((opts.width, opts.height), resample=Image.LANCZOS)
-        # Convert image to torch.Tensor and scale to [-1, 1]
-        # Image is in [0, 255] → scale to [0,1] → then map to [-1,1].
-        x = np.array(img).astype(np.float32)
-        x = torch.from_numpy(x)  # shape: (H, W, C)
-        x = (x / 127.5) - 1.0    # now in [-1, 1]
-        x = rearrange(x, "h w c -> 1 c h w")  # shape: (1, C, H, W)
-        # Move encoder to device if you are offloading
-        if self.offload:
-            self.ae.encoder.to(self.device)
-        x = x.to(self.device)
-        # 2) Encode with autocast
-        with torch.autocast(device_type=self.device.type, dtype=torch.bfloat16):
-            x = self.ae.encode(x)
-        x = x.to(torch.bfloat16)
-        # 3) Offload if needed
-        if self.offload:
-            self.ae.encoder.cpu()
-            torch.cuda.empty_cache()
-        t1 = time.perf_counter()
-        print(f"Encoded in {t1 - t0:.2f} seconds.")
-        return x
-    @spaces.GPU(duration=90)
     @torch.inference_mode()
     def generate_image(
         self,
@@ -153,8 +112,31 @@ class FluxGenerator:
         noise = rearrange(noise, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
         if noise.shape[0] == 1 and bs > 1:
             noise = repeat(noise, "1 ... -> bs ...", bs=bs)
-        # encode
-        x = self.encode_image_to_latents(id_image, opts)
         timesteps = get_schedule(opts.num_steps, x.shape[-1] * x.shape[-2] // 4, shift=False)

     ae = load_ae(name, device=device)
     return model, ae, t5, clip
 class FluxGenerator:
     def __init__(self, model_name: str, device: str, offload: bool, aggressive_offload: bool, args):
         self.device = torch.device(device)
         self.pulid_model = PuLIDPipeline(self.model, device='cuda', weight_dtype=torch.bfloat16)
         self.pulid_model.load_pretrain(args.pretrained_model)
+    @spaces.GPU(duration=60)
     @torch.inference_mode()
     def generate_image(
         self,
         noise = rearrange(noise, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
         if noise.shape[0] == 1 and bs > 1:
             noise = repeat(noise, "1 ... -> bs ...", bs=bs)
+        # Encode id_image directly here
+        encode_t0 = time.perf_counter()
+        # Resize image
+        id_image = id_image.resize((opts.width, opts.height), resample=Image.LANCZOS)
+        # Convert image to torch.Tensor and scale to [-1, 1]
+        x = np.array(id_image).astype(np.float32)
+        x = torch.from_numpy(x)  # shape: (H, W, C)
+        x = (x / 127.5) - 1.0    # now in [-1, 1]
+        x = rearrange(x, "h w c -> 1 c h w")  # shape: (1, C, H, W)
+        x = x.to(self.device)
+        # Encode with autocast
+        with torch.autocast(device_type=self.device.type, dtype=torch.bfloat16):
+            x = self.ae.encode(x)
+        x = x.to(torch.bfloat16)
+        # Offload if needed
+        if self.offload:
+            self.ae.encoder.to("cpu")
+            torch.cuda.empty_cache()
+        encode_t1 = time.perf_counter()
+        print(f"Encoded in {encode_t1 - encode_t0:.2f} seconds.")
         timesteps = get_schedule(opts.num_steps, x.shape[-1] * x.shape[-2] // 4, shift=False)