Spaces:

salomonsky
/

flux3

Running

App Files Files Community

vilarin commited on May 28, 2024

Commit

d290faa

verified ·

1 Parent(s): 9e4e479

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -90,12 +90,12 @@ class ModelWrapper:
             raise NotImplementedError()
         DTYPE = prompt_embed.dtype
-        print(DTYPE)
         for constant in all_timesteps:
             current_timesteps = torch.ones(len(prompt_embed), device="cuda", dtype=torch.long) * constant
-            current_timesteps = current_timesteps.to(torch.float32)
-            print(current_timesteps.dtype)
             eval_images = self.model(noise, current_timesteps, prompt_embed, added_cond_kwargs=unet_added_conditions).sample
             print(type(eval_images))
@@ -123,7 +123,7 @@ class ModelWrapper:
         add_time_ids = self.build_condition_input(height, width).repeat(num_images, 1)
-        noise = torch.randn(num_images, 4, height // self.vae_downsample_ratio, width // self.vae_downsample_ratio, generator=generator).to(device="cuda", dtype=torch.float32)
         prompt_inputs = self._encode_prompt(prompt)
@@ -142,9 +142,10 @@ class ModelWrapper:
         }
-        print(noise.dtype)
-        print(batch_prompt_embeds.dtype)
         eval_images = self.sample(noise=noise, unet_added_conditions=unet_added_conditions, prompt_embed=batch_prompt_embeds, fast_vae_decode=fast_vae_decode)
@@ -165,7 +166,7 @@ def get_x0_from_noise(sample, model_output, alphas_cumprod, timestep):
     return pred_original_sample
 class SDXLTextEncoder(torch.nn.Module):
-    def __init__(self, model_id, revision, accelerator, dtype=torch.float32):
         super().__init__()
         self.text_encoder_one = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", revision=revision).to(0).to(dtype=dtype)

             raise NotImplementedError()
         DTYPE = prompt_embed.dtype
+        print(f'prompt_embed: {DTYPE}')
         for constant in all_timesteps:
             current_timesteps = torch.ones(len(prompt_embed), device="cuda", dtype=torch.long) * constant
+            current_timesteps = current_timesteps.to(torch.float16)
+            print(f'current_timestpes: {current_timesteps.dtype}')
             eval_images = self.model(noise, current_timesteps, prompt_embed, added_cond_kwargs=unet_added_conditions).sample
             print(type(eval_images))
         add_time_ids = self.build_condition_input(height, width).repeat(num_images, 1)
+        noise = torch.randn(num_images, 4, height // self.vae_downsample_ratio, width // self.vae_downsample_ratio, generator=generator).to(device="cuda", dtype=torch.float16)
         prompt_inputs = self._encode_prompt(prompt)
         }
+        print(f'noise: {noise.dtype}')
+        print(f'prompt: {batch_prompt_embeds.dtype}')
+        print(unet_added_conditions['time_ids'].dtype)
+        print(unet_added_conditions['text_embeds'].dtype)
         eval_images = self.sample(noise=noise, unet_added_conditions=unet_added_conditions, prompt_embed=batch_prompt_embeds, fast_vae_decode=fast_vae_decode)
     return pred_original_sample
 class SDXLTextEncoder(torch.nn.Module):
+    def __init__(self, model_id, revision, accelerator, dtype=torch.float16):
         super().__init__()
         self.text_encoder_one = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", revision=revision).to(0).to(dtype=dtype)