Spaces:

fbnnb
/

TC_1024

Runtime error

App Files Files Community

fbnnb commited on Dec 25, 2024

Commit

ae7105e

verified ·

1 Parent(s): df4181d

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +141 -140

gradio_app.py CHANGED Viewed

@@ -132,152 +132,153 @@ def untranspose(tensor):
 def get_image(image1, prompt, image2, dim_steps=50, ddim_eta=1., fs=None, seed=123, \
                         unconditional_guidance_scale=1.0, cfg_img=None,  text_input=False, multiple_cond_cfg=False, \
                         loop=False, interp=False, timestep_spacing='uniform', guidance_rescale=0.0, noise_shape=[72, 108], n_samples=1, **kwargs):
-    seed_everything(seed)
-    video_size = (576, 1024)
-    transform = transforms.Compose([
-        transforms.Resize(min(video_size)),
-        transforms.CenterCrop(video_size),
-        # transforms.ToTensor(),
-        # transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
-    ])
-    image1 = torch.from_numpy(image1).permute(2, 0, 1).float().cuda()
-    input_h, input_w = image1.shape[1:]
-    image1 = (image1 / 255. - 0.5) * 2
-    image2 = torch.from_numpy(image2).permute(2, 0, 1).float().cuda()
-    input_h, input_w = image2.shape[1:]
-    image2 = (image2 / 255. - 0.5) * 2
-    # image1 = Image.open(file_list[2*idx]).convert('RGB')
-    image_tensor1 = transform(image1).unsqueeze(1) # [c,1,h,w]
-    # image2 = Image.open(file_list[2*idx+1]).convert('RGB')
-    image_tensor2 = transform(image2).unsqueeze(1) # [c,1,h,w]
-    frame_tensor1 = repeat(image_tensor1, 'c t h w -> c (repeat t) h w', repeat=8)
-    frame_tensor2 = repeat(image_tensor2, 'c t h w -> c (repeat t) h w', repeat=8)
-    videos = torch.cat([frame_tensor1, frame_tensor2], dim=1).unsqueeze(0)
-    # frame_tensor = torch.cat([frame_tensor1, frame_tensor1], dim=1)
-    # _, filename = os.path.split(file_list[idx*2])
-    global model
-    model.cuda()
-    ddim_sampler = DDIMSampler(model) if not multiple_cond_cfg else DDIMSampler_multicond(model)
-    batch_size = 1
-    fs = torch.tensor([fs], dtype=torch.long, device=model.device)
-    if not text_input:
-        prompts = [""]*batch_size
-    img = videos[:,:,0] #bchw
-    img_emb = model.embedder(img) ## blc
-    img_emb = model.image_proj_model(img_emb)
-    cond_emb = model.get_learned_conditioning(prompts)
-    cond = {"c_crossattn": [torch.cat([cond_emb,img_emb], dim=1)]}
-    if model.model.conditioning_key == 'hybrid':
-        z, hs = get_latent_z_with_hidden_states(model, videos) # b c t h w
-        if loop or interp:
-            img_cat_cond = torch.zeros_like(z)
-            img_cat_cond[:,:,0,:,:] = z[:,:,0,:,:]
-            img_cat_cond[:,:,-1,:,:] = z[:,:,-1,:,:]
-        else:
-            img_cat_cond = z[:,:,:1,:,:]
-            img_cat_cond = repeat(img_cat_cond, 'b c t h w -> b c (repeat t) h w', repeat=z.shape[2])
-        cond["c_concat"] = [img_cat_cond] # b c 1 h w
-    if unconditional_guidance_scale != 1.0:
-        if model.uncond_type == "empty_seq":
-            prompts = batch_size * [""]
-            uc_emb = model.get_learned_conditioning(prompts)
-        elif model.uncond_type == "zero_embed":
-            uc_emb = torch.zeros_like(cond_emb)
-        uc_img_emb = model.embedder(torch.zeros_like(img)) ## b l c
-        uc_img_emb = model.image_proj_model(uc_img_emb)
-        uc = {"c_crossattn": [torch.cat([uc_emb,uc_img_emb],dim=1)]}
-        if model.model.conditioning_key == 'hybrid':
-            uc["c_concat"] = [img_cat_cond]
-    else:
-        uc = None
-#
-    # for i, h in enumerate(hs):
-        # print("h:", h.shape)
-        # hs[i] = hs[i][:,:,0,:,:].unsqueeze(2)
-    additional_decode_kwargs = {'ref_context': hs}
-    # additional_decode_kwargs = {'ref_context': None}
-    ## we need one more unconditioning image=yes, text=""
-    if multiple_cond_cfg and cfg_img != 1.0:
-        uc_2 = {"c_crossattn": [torch.cat([uc_emb,img_emb],dim=1)]}
         if model.model.conditioning_key == 'hybrid':
-            uc_2["c_concat"] = [img_cat_cond]
-        kwargs.update({"unconditional_conditioning_img_nonetext": uc_2})
-    else:
-        kwargs.update({"unconditional_conditioning_img_nonetext": None})
-    z0 = None
-    cond_mask = None
-    batch_variants = []
-    for _ in range(n_samples):
-        if z0 is not None:
-            cond_z0 = z0.clone()
-            kwargs.update({"clean_cond": True})
         else:
-            cond_z0 = None
-        if ddim_sampler is not None:
-            samples, _ = ddim_sampler.sample(S=ddim_steps,
-                                            conditioning=cond,
-                                            batch_size=batch_size,
-                                            shape=noise_shape,
-                                            verbose=False,
-                                            unconditional_guidance_scale=unconditional_guidance_scale,
-                                            unconditional_conditioning=uc,
-                                            eta=ddim_eta,
-                                            cfg_img=cfg_img,
-                                            mask=cond_mask,
-                                            x0=cond_z0,
-                                            fs=fs,
-                                            timestep_spacing=timestep_spacing,
-                                            guidance_rescale=guidance_rescale,
-                                            **kwargs
-                                            )
-        ## reconstruct from latent to pixel space
-        batch_images = model.decode_first_stage(samples, **additional_decode_kwargs)
-        index = list(range(samples.shape[2]))
-        del index[1]
-        del index[-2]
-        samples = samples[:,:,index,:,:]
-        ## reconstruct from latent to pixel space
-        batch_images_middle = model.decode_first_stage(samples, **additional_decode_kwargs)
-        batch_images[:,:,batch_images.shape[2]//2-1:batch_images.shape[2]//2+1] = batch_images_middle[:,:,batch_images.shape[2]//2-2:batch_images.shape[2]//2]
-        batch_variants.append(batch_images)
-    ## variants, batch, c, t, h, w
-    batch_variants = torch.stack(batch_variants)
-    # return batch_variants.permute(1, 0, 2, 3, 4, 5)
-    prompt_str = prompt.replace("/", "_slash_") if "/" in prompt else prompt
-    prompt_str = prompt_str.replace(" ", "_") if " " in prompt else prompt_str
-    prompt_str=prompt_str[:40]
-    if len(prompt_str) == 0:
-        prompt_str = 'empty_prompt'
-    result_dir = "./tmp/"
-    save_videos(batch_image, result_dir, filenames=[prompt_str], fps=8)
-    print(f"Saved in {prompt_str}. Time used: {(time.time() - start):.2f} seconds")
-    model = model.cpu()
-    saved_result_dir = os.path.join(result_dir, f"{prompt_str}.mp4")
-    print("result saved to:", saved_result_dir)
-    return saved_result_dir

 def get_image(image1, prompt, image2, dim_steps=50, ddim_eta=1., fs=None, seed=123, \
                         unconditional_guidance_scale=1.0, cfg_img=None,  text_input=False, multiple_cond_cfg=False, \
                         loop=False, interp=False, timestep_spacing='uniform', guidance_rescale=0.0, noise_shape=[72, 108], n_samples=1, **kwargs):
+    with torch.no_grad():
+        seed_everything(seed)
+        video_size = (576, 1024)
+        transform = transforms.Compose([
+            transforms.Resize(min(video_size)),
+            transforms.CenterCrop(video_size),
+            # transforms.ToTensor(),
+            # transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        image1 = torch.from_numpy(image1).permute(2, 0, 1).float().cuda()
+        input_h, input_w = image1.shape[1:]
+        image1 = (image1 / 255. - 0.5) * 2
+        image2 = torch.from_numpy(image2).permute(2, 0, 1).float().cuda()
+        input_h, input_w = image2.shape[1:]
+        image2 = (image2 / 255. - 0.5) * 2
+        # image1 = Image.open(file_list[2*idx]).convert('RGB')
+        image_tensor1 = transform(image1).unsqueeze(1) # [c,1,h,w]
+        # image2 = Image.open(file_list[2*idx+1]).convert('RGB')
+        image_tensor2 = transform(image2).unsqueeze(1) # [c,1,h,w]
+        frame_tensor1 = repeat(image_tensor1, 'c t h w -> c (repeat t) h w', repeat=8)
+        frame_tensor2 = repeat(image_tensor2, 'c t h w -> c (repeat t) h w', repeat=8)
+        videos = torch.cat([frame_tensor1, frame_tensor2], dim=1).unsqueeze(0)
+        # frame_tensor = torch.cat([frame_tensor1, frame_tensor1], dim=1)
+        # _, filename = os.path.split(file_list[idx*2])
+        global model
+        model.cuda()
+        ddim_sampler = DDIMSampler(model) if not multiple_cond_cfg else DDIMSampler_multicond(model)
+        batch_size = 1
+        fs = torch.tensor([fs], dtype=torch.long, device=model.device)
+        if not text_input:
+            prompts = [""]*batch_size
+        img = videos[:,:,0] #bchw
+        img_emb = model.embedder(img) ## blc
+        img_emb = model.image_proj_model(img_emb)
+        cond_emb = model.get_learned_conditioning(prompts)
+        cond = {"c_crossattn": [torch.cat([cond_emb,img_emb], dim=1)]}
         if model.model.conditioning_key == 'hybrid':
+            z, hs = get_latent_z_with_hidden_states(model, videos) # b c t h w
+            if loop or interp:
+                img_cat_cond = torch.zeros_like(z)
+                img_cat_cond[:,:,0,:,:] = z[:,:,0,:,:]
+                img_cat_cond[:,:,-1,:,:] = z[:,:,-1,:,:]
+            else:
+                img_cat_cond = z[:,:,:1,:,:]
+                img_cat_cond = repeat(img_cat_cond, 'b c t h w -> b c (repeat t) h w', repeat=z.shape[2])
+            cond["c_concat"] = [img_cat_cond] # b c 1 h w
+        if unconditional_guidance_scale != 1.0:
+            if model.uncond_type == "empty_seq":
+                prompts = batch_size * [""]
+                uc_emb = model.get_learned_conditioning(prompts)
+            elif model.uncond_type == "zero_embed":
+                uc_emb = torch.zeros_like(cond_emb)
+            uc_img_emb = model.embedder(torch.zeros_like(img)) ## b l c
+            uc_img_emb = model.image_proj_model(uc_img_emb)
+            uc = {"c_crossattn": [torch.cat([uc_emb,uc_img_emb],dim=1)]}
+            if model.model.conditioning_key == 'hybrid':
+                uc["c_concat"] = [img_cat_cond]
+        else:
+            uc = None
+    #
+        # for i, h in enumerate(hs):
+            # print("h:", h.shape)
+            # hs[i] = hs[i][:,:,0,:,:].unsqueeze(2)
+        additional_decode_kwargs = {'ref_context': hs}
+        # additional_decode_kwargs = {'ref_context': None}
+        ## we need one more unconditioning image=yes, text=""
+        if multiple_cond_cfg and cfg_img != 1.0:
+            uc_2 = {"c_crossattn": [torch.cat([uc_emb,img_emb],dim=1)]}
+            if model.model.conditioning_key == 'hybrid':
+                uc_2["c_concat"] = [img_cat_cond]
+            kwargs.update({"unconditional_conditioning_img_nonetext": uc_2})
         else:
+            kwargs.update({"unconditional_conditioning_img_nonetext": None})
+        z0 = None
+        cond_mask = None
+        batch_variants = []
+        for _ in range(n_samples):
+            if z0 is not None:
+                cond_z0 = z0.clone()
+                kwargs.update({"clean_cond": True})
+            else:
+                cond_z0 = None
+            if ddim_sampler is not None:
+                samples, _ = ddim_sampler.sample(S=ddim_steps,
+                                                conditioning=cond,
+                                                batch_size=batch_size,
+                                                shape=noise_shape,
+                                                verbose=False,
+                                                unconditional_guidance_scale=unconditional_guidance_scale,
+                                                unconditional_conditioning=uc,
+                                                eta=ddim_eta,
+                                                cfg_img=cfg_img,
+                                                mask=cond_mask,
+                                                x0=cond_z0,
+                                                fs=fs,
+                                                timestep_spacing=timestep_spacing,
+                                                guidance_rescale=guidance_rescale,
+                                                **kwargs
+                                                )
+            ## reconstruct from latent to pixel space
+            batch_images = model.decode_first_stage(samples, **additional_decode_kwargs)
+            index = list(range(samples.shape[2]))
+            del index[1]
+            del index[-2]
+            samples = samples[:,:,index,:,:]
+            ## reconstruct from latent to pixel space
+            batch_images_middle = model.decode_first_stage(samples, **additional_decode_kwargs)
+            batch_images[:,:,batch_images.shape[2]//2-1:batch_images.shape[2]//2+1] = batch_images_middle[:,:,batch_images.shape[2]//2-2:batch_images.shape[2]//2]
+            batch_variants.append(batch_images)
+        ## variants, batch, c, t, h, w
+        batch_variants = torch.stack(batch_variants)
+        # return batch_variants.permute(1, 0, 2, 3, 4, 5)
+        prompt_str = prompt.replace("/", "_slash_") if "/" in prompt else prompt
+        prompt_str = prompt_str.replace(" ", "_") if " " in prompt else prompt_str
+        prompt_str=prompt_str[:40]
+        if len(prompt_str) == 0:
+            prompt_str = 'empty_prompt'
+        result_dir = "./tmp/"
+        save_videos(batch_image, result_dir, filenames=[prompt_str], fps=8)
+        print(f"Saved in {prompt_str}. Time used: {(time.time() - start):.2f} seconds")
+        model = model.cpu()
+        saved_result_dir = os.path.join(result_dir, f"{prompt_str}.mp4")
+        print("result saved to:", saved_result_dir)
+        return saved_result_dir