Spaces:

Zitang
/

Self-attention-based-V1MT-motion-model

Running on Zero

App Files Files Community

sunana commited on Nov 9, 2023

Commit

9056339

•

1 Parent(s): e717453

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -23

app.py CHANGED Viewed

@@ -8,22 +8,6 @@ import FFV1MT_MS
 import flow_tools
-print(f"Is CUDA available: {torch.cuda.is_available()}")
-# True
-print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-# Tesla T4
-model = FFV1MT_MS.FFV1DNN()
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print('Number fo parameters: {}'.format(model.num_parameters()))
-model.to(device)
-model_dict = torch.load('Model_example.pth.tar')['state_dict']
-# save model
-model.load_state_dict(model_dict, strict=True)
-model.eval()
 def process_images(videos, x, y):
     # read video file
     cap = cv2.VideoCapture(videos)
@@ -44,8 +28,8 @@ def process_images(videos, x, y):
     images = [torch.from_numpy(img).permute(2, 0, 1).float().to(device).unsqueeze(0) / 255.0 for img in images]
     # if the max size of the image is larger than 1024, resize the image to 1024 with same ratio
     max_size = max(images[0].shape[2], images[0].shape[3])
-    if max_size > 832:
-        ratio = 832 / max_size
         images = [torch.nn.functional.interpolate(img, scale_factor=ratio, mode='bicubic', align_corners=True) for img
                   in images]
     # transform color image to gray image
@@ -86,15 +70,32 @@ description = "## Introduction(^_^)\n" \
               "We also provide two sliders to adjust the location of the attention visualizer. \n" \
               " **Note**: The demo is running on CPU, so it may take a while to process the video.  \n"
-# examples = [["example_1.mp4", 62, 56], ["example_2.mp4", 59, 55], ["example_3.mp4", 50, 50], ["example_4.mp4", 50, 50],
-#             ["example_5.mp4", 39, 72]]
-examples = [["example_1.mp4", 62, 56]]
 md = "![](https://drive.google.com/uc?id=1WBqYsKRwn_78A72MJBrk643l3-gfAssP) \n"  \
      "## Author \n" \
      "This project page is developed by Zitang Sun (zitangsun96 @ gmail.com)\n" \
      "## LICENSE \n" \
      "This project is licensed under the terms of the MIT license. \n"
-iface = gr.Interface(fn=process_images,
                      inputs=[gr.Video(label="Upload video or use the example images below"),
                              gr.Slider(0, 100, label='X location of attention visualizer'),
                              gr.Slider(0, 100, label='Y location of attention visualizer')],
@@ -107,4 +108,6 @@ iface = gr.Interface(fn=process_images,
                      article=md,
                      examples=examples)
-iface.launch(debug=True)

 import flow_tools
 def process_images(videos, x, y):
     # read video file
     cap = cv2.VideoCapture(videos)
     images = [torch.from_numpy(img).permute(2, 0, 1).float().to(device).unsqueeze(0) / 255.0 for img in images]
     # if the max size of the image is larger than 1024, resize the image to 1024 with same ratio
     max_size = max(images[0].shape[2], images[0].shape[3])
+    if max_size > 768:
+        ratio = 768 / max_size
         images = [torch.nn.functional.interpolate(img, scale_factor=ratio, mode='bicubic', align_corners=True) for img
                   in images]
     # transform color image to gray image
               "We also provide two sliders to adjust the location of the attention visualizer. \n" \
               " **Note**: The demo is running on CPU, so it may take a while to process the video.  \n"
+examples = [["example_1.mp4", 62, 56], ["example_2.mp4", 59, 55], ["example_3.mp4", 50, 50], ["example_4.mp4", 50, 50],
+            ["example_5.mp4", 39, 72]]
+# examples = [["example_1.mp4", 62, 56]]
 md = "![](https://drive.google.com/uc?id=1WBqYsKRwn_78A72MJBrk643l3-gfAssP) \n"  \
      "## Author \n" \
      "This project page is developed by Zitang Sun (zitangsun96 @ gmail.com)\n" \
      "## LICENSE \n" \
      "This project is licensed under the terms of the MIT license. \n"
+if __name__ =='__main__':
+    print(f"Is CUDA available: {torch.cuda.is_available()}")
+    # True
+    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+    # Tesla T4
+    model = FFV1MT_MS.FFV1DNN()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print('Number fo parameters: {}'.format(model.num_parameters()))
+    model.to(device)
+    model_dict = torch.load('Model_example.pth.tar')['state_dict']
+    # save model
+    model.load_state_dict(model_dict, strict=True)
+    model.eval()
+    iface = gr.Interface(fn=process_images,
                      inputs=[gr.Video(label="Upload video or use the example images below"),
                              gr.Slider(0, 100, label='X location of attention visualizer'),
                              gr.Slider(0, 100, label='Y location of attention visualizer')],
                      article=md,
                      examples=examples)
+    iface.launch(debug=True)