frutiemax
/

rct_model

Model card Files Files and versions Community

frutiemax commited on Sep 23, 2023

Commit

21640c8

•

1 Parent(s): a4c8091

Revert to not using accelerated

Browse files

Files changed (1) hide show

train_model.py +15 -28

train_model.py CHANGED Viewed

@@ -103,18 +103,9 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
     # lets train for 100 epoch for each sprite in the dataset with a random noise level
     progress_bar = tqdm(total=epochs)
-    accelerator = Accelerator(
-        mixed_precision='fp16',
-        gradient_accumulation_steps=1,
-        log_with="tensorboard",
-        project_dir='logs',
-    )
     scheduler = DDPMScheduler(scheduler_num_timesteps)
-    unet, scheduler, optimizer, lr_scheduler = accelerator.prepare(unet, scheduler, \
-      optimizer, lr_scheduler)
-    unet = unet.to(dtype=torch.float16)
     scheduler.set_timesteps(scheduler_num_timesteps)
     for epoch in range(epochs):
@@ -123,27 +114,23 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
             progress_bar.set_description(f'epoch={epoch}, batch_index={batch_index}')
             batch_end = np.minimum(num_images, batch_index + batch_size)
             clean_images = targets[batch_index:batch_end]
-            clean_images = torch.reshape(clean_images, ((batch_end - batch_index), 12, 256, 256))
-            noise = torch.randn(clean_images.shape, dtype=torch.float16)
-            timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch_end - batch_index, ))
             #timesteps = timesteps.to(dtype=torch.int, device='cuda')
-            noisy_images = scheduler.add_noise(clean_images, noise, timesteps).to(device='cuda', dtype=torch.float16)
-            with accelerator.accumulate(unet):
-                noise_pred = unet(noisy_images, timesteps.to(device='cuda'), class_labels[batch_index:batch_end].to(device='cuda',dtype=torch.float16), return_dict=False)[0]
-                #noise_pred = noise_pred.to(device='cuda', dtype=torch.float16)
-                loss = F.mse_loss(noise_pred, noise.to('cuda', dtype=torch.float16))
-                accelerator.backward(loss)
-                accelerator.clip_grad_norm_(unet.parameters(), 1.0)
-                optimizer.step()
-                lr_scheduler.step()
-                optimizer.zero_grad()
         if (epoch + 1) % save_model_interval == 0:
-            model.unet = accelerator.unwrap_model(unet)
             model.scheduler = scheduler
             save_and_test(model, epoch)
             del model.unet
@@ -152,4 +139,4 @@ def train_model(batch_size=4, epochs=100, scheduler_num_timesteps=20, save_model
 if __name__ == '__main__':
-    train_model(4)

     # lets train for 100 epoch for each sprite in the dataset with a random noise level
     progress_bar = tqdm(total=epochs)
     scheduler = DDPMScheduler(scheduler_num_timesteps)
+    unet = unet.to(device='cuda', dtype=torch.float16)
     scheduler.set_timesteps(scheduler_num_timesteps)
     for epoch in range(epochs):
             progress_bar.set_description(f'epoch={epoch}, batch_index={batch_index}')
             batch_end = np.minimum(num_images, batch_index + batch_size)
             clean_images = targets[batch_index:batch_end]
+            clean_images = torch.reshape(clean_images, ((batch_end - batch_index), 12, 256, 256)).to(device='cuda', dtype=torch.float16)
+            noise = torch.randn(clean_images.shape, dtype=torch.float16, device='cuda')
+            timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch_end - batch_index, )).to(device='cuda')
             #timesteps = timesteps.to(dtype=torch.int, device='cuda')
+            noisy_images = scheduler.add_noise(clean_images, noise, timesteps)
+            noise_pred = unet(noisy_images, timesteps, class_labels[batch_index:batch_end].to(device='cuda',dtype=torch.float16), return_dict=False)[0]
+            #noise_pred = noise_pred.to(device='cuda', dtype=torch.float16)
+            loss = F.mse_loss(noise_pred, noise)
+            loss.backward()
+            optimizer.step()
+            lr_scheduler.step()
+            optimizer.zero_grad()
         if (epoch + 1) % save_model_interval == 0:
+            model.unet = unet
             model.scheduler = scheduler
             save_and_test(model, epoch)
             del model.unet
 if __name__ == '__main__':
+    train_model(8, save_model_interval=1)