frutiemax
/

rct_model

Model card Files Files and versions Community

frutiemax commited on Sep 23, 2023

Commit

2104644

•

1 Parent(s): 054faf7

Switch to stabilityai/sd-vae-ft-mse

Browse files

Files changed (1) hide show

train_model.py +7 -3

train_model.py CHANGED Viewed

@@ -12,8 +12,8 @@ from tqdm.auto import tqdm
 from accelerate import Accelerator
 from diffusers import DDPMScheduler, UNet2DConditionModel, AutoencoderKL
-SAMPLE_SIZE = 512
-LATENT_SIZE = 64
 SAMPLE_NUM_CHANNELS = 3
 LATENT_NUM_CHANNELS = 4
@@ -109,7 +109,7 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
                             block_out_channels=(64, 128, 256), norm_num_groups=32)
     unet = unet.to(dtype=torch.float16)
     scheduler = DDPMScheduler(num_train_timesteps=20)
-    vae = AutoencoderKL.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="vae", use_safetensors=True, variant='fp16')
     vae = vae.to(dtype=torch.float16)
     optimizer = torch.optim.Adam(unet.parameters(), lr=start_learning_rate)
@@ -121,6 +121,7 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
     model = RCTDiffusionPipeline(unet, scheduler, vae)
     model.load_dictionaries_from_dataset()
     labels = convert_labels(dataset, model, num_images)
     # lets train for 100 epoch for each sprite in the dataset with a random noise level
     progress_bar = tqdm(total=epochs)
@@ -139,6 +140,7 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
             timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch_end - batch_index, )).to(device='cuda')
             #timesteps = timesteps.to(dtype=torch.int, device='cuda')
             noisy_images = scheduler.add_noise(clean_images, noise, timesteps)
             # encode through the vae
             with accelerator.accumulate(unet):
@@ -153,6 +155,8 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
                     result = vae.encode(images).latent_dist.sample()
                     latent_noises[:, view_index*LATENT_NUM_CHANNELS:(view_index+1)*LATENT_NUM_CHANNELS] = result
                 unet_results = unet(latent_images, timesteps, labels[batch_index:batch_end])[0]
                 unet_results = unet_results.to(dtype=torch.float16)

 from accelerate import Accelerator
 from diffusers import DDPMScheduler, UNet2DConditionModel, AutoencoderKL
+SAMPLE_SIZE = 256
+LATENT_SIZE = 32
 SAMPLE_NUM_CHANNELS = 3
 LATENT_NUM_CHANNELS = 4
                             block_out_channels=(64, 128, 256), norm_num_groups=32)
     unet = unet.to(dtype=torch.float16)
     scheduler = DDPMScheduler(num_train_timesteps=20)
+    vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", use_safetensors=True)
     vae = vae.to(dtype=torch.float16)
     optimizer = torch.optim.Adam(unet.parameters(), lr=start_learning_rate)
     model = RCTDiffusionPipeline(unet, scheduler, vae)
     model.load_dictionaries_from_dataset()
     labels = convert_labels(dataset, model, num_images)
+    del model
     # lets train for 100 epoch for each sprite in the dataset with a random noise level
     progress_bar = tqdm(total=epochs)
             timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch_end - batch_index, )).to(device='cuda')
             #timesteps = timesteps.to(dtype=torch.int, device='cuda')
             noisy_images = scheduler.add_noise(clean_images, noise, timesteps)
+            del clean_images
             # encode through the vae
             with accelerator.accumulate(unet):
                     result = vae.encode(images).latent_dist.sample()
                     latent_noises[:, view_index*LATENT_NUM_CHANNELS:(view_index+1)*LATENT_NUM_CHANNELS] = result
+                del noise
+                del noisy_images
                 unet_results = unet(latent_images, timesteps, labels[batch_index:batch_end])[0]
                 unet_results = unet_results.to(dtype=torch.float16)