shopsmart

Paused

App Files Files Community

Spanicin commited on Sep 6, 2024

Commit

a60f0f9

verified ·

1 Parent(s): f6d22ad

Update src/facerender/modules/make_animation.py

Browse files

Files changed (1) hide show

src/facerender/modules/make_animation.py +11 -36

src/facerender/modules/make_animation.py CHANGED Viewed

@@ -145,50 +145,25 @@ def keypoint_transformation(kp_canonical, he, wo_exp=False):
 #     return predictions_ts
 import torch
-from torch.cuda import CUDAGraph
 def make_animation(source_image, source_semantics, target_semantics,
                    generator, kp_detector, he_estimator, mapping,
                    yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None,
-                   use_exp=True, device='cuda'):
     source_image = source_image.to(device)
     source_semantics = source_semantics.to(device)
     target_semantics = target_semantics.to(device)
-    # Prepare for CUDA Graph capture
-    with torch.no_grad():
         predictions = []
         kp_canonical = kp_detector(source_image)
         he_source = mapping(source_semantics)
         kp_source = keypoint_transformation(kp_canonical, he_source)
-        # Use a non-default CUDA stream for graph capture
-        capture_stream = torch.cuda.Stream()
-        graph = CUDAGraph()
-        # Warm-up to ensure proper graph capturing
-        torch.cuda.synchronize()
-        with torch.cuda.stream(capture_stream):
-            target_semantics_frame = target_semantics[:, 0]
-            he_driving = mapping(target_semantics_frame)
-            if yaw_c_seq is not None:
-                he_driving['yaw_in'] = yaw_c_seq[:, 0]
-            if pitch_c_seq is not None:
-                he_driving['pitch_in'] = pitch_c_seq[:, 0]
-            if roll_c_seq is not None:
-                he_driving['roll_in'] = roll_c_seq[:, 0]
-            kp_driving = keypoint_transformation(kp_canonical, he_driving)
-            kp_norm = kp_driving
-            # Begin capturing the graph
-            graph.capture_begin()
-            out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm)
-            graph.capture_end()
-        # Execute the graph on the default stream
         for frame_idx in range(target_semantics.shape[1]):
             target_semantics_frame = target_semantics[:, frame_idx]
             he_driving = mapping(target_semantics_frame)
@@ -203,22 +178,22 @@ def make_animation(source_image, source_semantics, target_semantics,
             kp_driving = keypoint_transformation(kp_canonical, he_driving)
             kp_norm = kp_driving
-            # Replay the captured graph
-            with torch.cuda.stream(torch.cuda.current_stream()):
                 out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm)
             predictions.append(out['prediction'])
-            # Optional: Explicitly synchronize if needed
             torch.cuda.synchronize()
         predictions_ts = torch.stack(predictions, dim=1)
     return predictions_ts
 class AnimateModel(torch.nn.Module):
     """
     Merge all generator related updates into single model for better multi-gpu usage

 #     return predictions_ts
 import torch
+from torch.cuda.amp import autocast
 def make_animation(source_image, source_semantics, target_semantics,
                    generator, kp_detector, he_estimator, mapping,
                    yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None,
+                   use_exp=True):
+    device='cuda'
+    # Move inputs to GPU
     source_image = source_image.to(device)
     source_semantics = source_semantics.to(device)
     target_semantics = target_semantics.to(device)
+    with torch.no_grad():  # No gradients needed
         predictions = []
         kp_canonical = kp_detector(source_image)
         he_source = mapping(source_semantics)
         kp_source = keypoint_transformation(kp_canonical, he_source)
         for frame_idx in range(target_semantics.shape[1]):
             target_semantics_frame = target_semantics[:, frame_idx]
             he_driving = mapping(target_semantics_frame)
             kp_driving = keypoint_transformation(kp_canonical, he_driving)
             kp_norm = kp_driving
+            # Use mixed precision for faster computation
+            with autocast():
                 out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm)
             predictions.append(out['prediction'])
+            # Optional: Explicitly synchronize (use only if necessary)
             torch.cuda.synchronize()
+        # Stack predictions into a single tensor
         predictions_ts = torch.stack(predictions, dim=1)
     return predictions_ts
 class AnimateModel(torch.nn.Module):
     """
     Merge all generator related updates into single model for better multi-gpu usage