Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -90,12 +90,12 @@ class ModelWrapper:
|
|
90 |
raise NotImplementedError()
|
91 |
|
92 |
DTYPE = prompt_embed.dtype
|
93 |
-
print(DTYPE)
|
94 |
|
95 |
for constant in all_timesteps:
|
96 |
current_timesteps = torch.ones(len(prompt_embed), device="cuda", dtype=torch.long) * constant
|
97 |
-
current_timesteps = current_timesteps.to(torch.
|
98 |
-
print(current_timesteps.dtype)
|
99 |
eval_images = self.model(noise, current_timesteps, prompt_embed, added_cond_kwargs=unet_added_conditions).sample
|
100 |
print(type(eval_images))
|
101 |
|
@@ -123,7 +123,7 @@ class ModelWrapper:
|
|
123 |
|
124 |
add_time_ids = self.build_condition_input(height, width).repeat(num_images, 1)
|
125 |
|
126 |
-
noise = torch.randn(num_images, 4, height // self.vae_downsample_ratio, width // self.vae_downsample_ratio, generator=generator).to(device="cuda", dtype=torch.
|
127 |
|
128 |
prompt_inputs = self._encode_prompt(prompt)
|
129 |
|
@@ -142,9 +142,10 @@ class ModelWrapper:
|
|
142 |
}
|
143 |
|
144 |
|
145 |
-
print(noise.dtype)
|
146 |
-
print(batch_prompt_embeds.dtype)
|
147 |
-
|
|
|
148 |
|
149 |
eval_images = self.sample(noise=noise, unet_added_conditions=unet_added_conditions, prompt_embed=batch_prompt_embeds, fast_vae_decode=fast_vae_decode)
|
150 |
|
@@ -165,7 +166,7 @@ def get_x0_from_noise(sample, model_output, alphas_cumprod, timestep):
|
|
165 |
return pred_original_sample
|
166 |
|
167 |
class SDXLTextEncoder(torch.nn.Module):
|
168 |
-
def __init__(self, model_id, revision, accelerator, dtype=torch.
|
169 |
super().__init__()
|
170 |
|
171 |
self.text_encoder_one = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", revision=revision).to(0).to(dtype=dtype)
|
|
|
90 |
raise NotImplementedError()
|
91 |
|
92 |
DTYPE = prompt_embed.dtype
|
93 |
+
print(f'prompt_embed: {DTYPE}')
|
94 |
|
95 |
for constant in all_timesteps:
|
96 |
current_timesteps = torch.ones(len(prompt_embed), device="cuda", dtype=torch.long) * constant
|
97 |
+
current_timesteps = current_timesteps.to(torch.float16)
|
98 |
+
print(f'current_timestpes: {current_timesteps.dtype}')
|
99 |
eval_images = self.model(noise, current_timesteps, prompt_embed, added_cond_kwargs=unet_added_conditions).sample
|
100 |
print(type(eval_images))
|
101 |
|
|
|
123 |
|
124 |
add_time_ids = self.build_condition_input(height, width).repeat(num_images, 1)
|
125 |
|
126 |
+
noise = torch.randn(num_images, 4, height // self.vae_downsample_ratio, width // self.vae_downsample_ratio, generator=generator).to(device="cuda", dtype=torch.float16)
|
127 |
|
128 |
prompt_inputs = self._encode_prompt(prompt)
|
129 |
|
|
|
142 |
}
|
143 |
|
144 |
|
145 |
+
print(f'noise: {noise.dtype}')
|
146 |
+
print(f'prompt: {batch_prompt_embeds.dtype}')
|
147 |
+
print(unet_added_conditions['time_ids'].dtype)
|
148 |
+
print(unet_added_conditions['text_embeds'].dtype)
|
149 |
|
150 |
eval_images = self.sample(noise=noise, unet_added_conditions=unet_added_conditions, prompt_embed=batch_prompt_embeds, fast_vae_decode=fast_vae_decode)
|
151 |
|
|
|
166 |
return pred_original_sample
|
167 |
|
168 |
class SDXLTextEncoder(torch.nn.Module):
|
169 |
+
def __init__(self, model_id, revision, accelerator, dtype=torch.float16):
|
170 |
super().__init__()
|
171 |
|
172 |
self.text_encoder_one = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", revision=revision).to(0).to(dtype=dtype)
|