frutiemax
/

rct_model

Model card Files Files and versions Community

frutiemax commited on Sep 23, 2023

Commit

4f25fc2

1 Parent(s): 2104644

Fix pipeline for new latent and sample sizes

Browse files

Files changed (1) hide show

rct_diffusion_pipeline.py +8 -7

rct_diffusion_pipeline.py CHANGED Viewed

@@ -12,7 +12,7 @@ import pandas as pd
 from tqdm.auto import tqdm
 class RCTDiffusionPipeline(DiffusionPipeline):
-    def __init__(self, unet, scheduler, vae):
         super().__init__()
         # dictionnary that keeps the different classes of object description, color1, color2 and color3
@@ -24,6 +24,8 @@ class RCTDiffusionPipeline(DiffusionPipeline):
         self.scheduler = scheduler
         self.unet = unet
         self.vae = vae
         # channels for 1 image
         self.num_channels = int(self.unet.config.in_channels / 4)
@@ -164,15 +166,14 @@ class RCTDiffusionPipeline(DiffusionPipeline):
         # now put those weights into a tensor
         return self.pack_labels_to_tensor(batch_size, object_descriptions, colors1, colors2, colors3).to(device='cuda',dtype=torch.float16)
-    # generate 64x64 latents
     def generate_noise_batches(self, batch_size):
-        noise_batches = torch.Tensor(size=(batch_size, 4, self.num_channels, 64, 64)).to(dtype=torch.float16, device='cuda')
         for batch_index in range(batch_size):
             for view_index in range(4):
-                noise = torch.randn(self.num_channels, 64, 64).to(dtype=torch.float16, device='cuda')
                 noise_batches[batch_index, view_index] = noise
-        return torch.reshape(noise_batches, (batch_size, 1, self.num_channels*4, 64, 64)).to(dtype=torch.float16, device='cuda')
     def __call__(self, object_description : list[list[tuple[str, float]]], color1 : list[list[tuple[str, float]]], \
                 color2 : list[list[tuple[str, float]]] = None, color3 : list[list[tuple[str, float]]] = None, \
@@ -201,8 +202,8 @@ class RCTDiffusionPipeline(DiffusionPipeline):
             epoch = epoch + 1
         # reshape the data so we get back 4 RGB images
-        noise_batches = torch.reshape(noise_batches, (batch_size, 4, self.num_channels, 64, 64))
-        images = torch.Tensor(size=(batch_size, 4, 3, 512, 512))
         with torch.no_grad():
             for image_index in range(4):

 from tqdm.auto import tqdm
 class RCTDiffusionPipeline(DiffusionPipeline):
+    def __init__(self, unet, scheduler, vae, latent_size=32, sample_size=256):
         super().__init__()
         # dictionnary that keeps the different classes of object description, color1, color2 and color3
         self.scheduler = scheduler
         self.unet = unet
         self.vae = vae
+        self.latent_size = latent_size
+        self.sample_size = sample_size
         # channels for 1 image
         self.num_channels = int(self.unet.config.in_channels / 4)
         # now put those weights into a tensor
         return self.pack_labels_to_tensor(batch_size, object_descriptions, colors1, colors2, colors3).to(device='cuda',dtype=torch.float16)
     def generate_noise_batches(self, batch_size):
+        noise_batches = torch.Tensor(size=(batch_size, 4, self.num_channels, self.latent_size, self.latent_size)).to(dtype=torch.float16, device='cuda')
         for batch_index in range(batch_size):
             for view_index in range(4):
+                noise = torch.randn(self.num_channels, self.latent_size, self.latent_size).to(dtype=torch.float16, device='cuda')
                 noise_batches[batch_index, view_index] = noise
+        return torch.reshape(noise_batches, (batch_size, 1, self.num_channels*4, self.latent_size, self.latent_size)).to(dtype=torch.float16, device='cuda')
     def __call__(self, object_description : list[list[tuple[str, float]]], color1 : list[list[tuple[str, float]]], \
                 color2 : list[list[tuple[str, float]]] = None, color3 : list[list[tuple[str, float]]] = None, \
             epoch = epoch + 1
         # reshape the data so we get back 4 RGB images
+        noise_batches = torch.reshape(noise_batches, (batch_size, 4, self.num_channels, self.latent_size, self.latent_size))
+        images = torch.Tensor(size=(batch_size, 4, 3, self.sample_size, self.sample_size))
         with torch.no_grad():
             for image_index in range(4):