Spaces:

Doubiiu
/

DynamiCrafter

Running on Zero

Doubiiu commited on Feb 16, 2024

Commit

b5d93b2

verified ·

1 Parent(s): 7f31be9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,34 +65,34 @@ def infer(image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123):
     noise_shape = [batch_size, channels, frames, h, w]
     # text cond
-    text_emb = model.get_learned_conditioning([prompt])
-    # img cond
-    img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
-    img_tensor = (img_tensor / 255. - 0.5) * 2
-    image_tensor_resized = transform(img_tensor) #3,256,256
-    videos = image_tensor_resized.unsqueeze(0) # bchw
-    z = get_latent_z(model, videos.unsqueeze(2)) #bc,1,hw
-    img_tensor_repeat = repeat(z, 'b c t h w -> b c (repeat t) h w', repeat=frames)
-    cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
-    img_emb = model.image_proj_model(cond_images)
-    imtext_cond = torch.cat([text_emb, img_emb], dim=1)
-    fs = torch.tensor([fs], dtype=torch.long, device=model.device)
-    cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
-    ## inference
-    with torch.no_grad(), torch.cuda.amp.autocast():
         batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
-    ## b,samples,c,t,h,w
-    video_path = './output.mp4'
-    save_videos(batch_samples, './', filenames=['output'], fps=save_fps)
     model = model.cpu()
     return video_path

     noise_shape = [batch_size, channels, frames, h, w]
     # text cond
+    with torch.no_grad(), torch.cuda.amp.autocast():
+        text_emb = model.get_learned_conditioning([prompt])
+        # img cond
+        img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
+        img_tensor = (img_tensor / 255. - 0.5) * 2
+        image_tensor_resized = transform(img_tensor) #3,256,256
+        videos = image_tensor_resized.unsqueeze(0) # bchw
+        z = get_latent_z(model, videos.unsqueeze(2)) #bc,1,hw
+        img_tensor_repeat = repeat(z, 'b c t h w -> b c (repeat t) h w', repeat=frames)
+        cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
+        img_emb = model.image_proj_model(cond_images)
+        imtext_cond = torch.cat([text_emb, img_emb], dim=1)
+        fs = torch.tensor([fs], dtype=torch.long, device=model.device)
+        cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
+        ## inference
         batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
+        ## b,samples,c,t,h,w
+        video_path = './output.mp4'
+        save_videos(batch_samples, './', filenames=['output'], fps=save_fps)
     model = model.cpu()
     return video_path