SmolVLM2-XSPFGenerator

Sleeping

App Files Files Community

mfarre HF staff commited on 14 days ago

Commit

4c7362f

1 Parent(s): 2f5ae20

initial test

Browse files

Files changed (2) hide show

app.py +9 -1
video_highlight_detector.py +48 -9

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import Tuple, Optional
 import torch
 from pathlib import Path
 import time
 from video_highlight_detector import (
     load_model,
@@ -159,5 +161,11 @@ def create_ui(examples_path: str):
     return app
 if __name__ == "__main__":
     app = create_ui("video_spec.json")
-    app.launch(share=True)

 import torch
 from pathlib import Path
 import time
+import torch
 from video_highlight_detector import (
     load_model,
     return app
 if __name__ == "__main__":
+    # Initialize CUDA
+    if not torch.cuda.is_available():
+        raise RuntimeError("This application requires a GPU to run")
+    torch.cuda.init()
+    torch.cuda.empty_cache()
     app = create_ui("video_spec.json")
+    app.launch()

video_highlight_detector.py CHANGED Viewed

@@ -732,35 +732,74 @@ class BatchedVideoHighlightDetector:
 def load_model(
     checkpoint_path: Optional[str] = None,
-    base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
     device: str = "cuda"
 ):
     """Load the model and processor."""
-    # For demonstration, we set the target size
     video_target_size = 384
     processor = AutoProcessor.from_pretrained(base_model_id)
-    # Configure the image processor
     processor.image_processor.size = {"longest_edge": video_target_size}
     processor.image_processor.do_resize = True
     processor.image_processor.do_image_splitting = False
     if checkpoint_path:
         model = SmolVLMForConditionalGeneration.from_pretrained(
             checkpoint_path,
-            torch_dtype=torch.bfloat16,
-            device_map=device
         )
     else:
-        model =  SmolVLMForConditionalGeneration.from_pretrained(
             base_model_id,
-            torch_dtype=torch.bfloat16,
-            device_map=device
         )
     return model, processor
 def main():
     checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
     base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"

 def load_model(
     checkpoint_path: Optional[str] = None,
+    base_model_id: str = "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
     device: str = "cuda"
 ):
     """Load the model and processor."""
+    if device == "cuda" and not torch.cuda.is_available():
+        raise RuntimeError("CUDA requested but not available")
+    if device == "cuda":
+        torch.cuda.empty_cache()
+        # Initialize CUDA
+        torch.cuda.init()
     video_target_size = 384
     processor = AutoProcessor.from_pretrained(base_model_id)
     processor.image_processor.size = {"longest_edge": video_target_size}
     processor.image_processor.do_resize = True
     processor.image_processor.do_image_splitting = False
+    model_kwargs = {
+        "torch_dtype": torch.bfloat16,
+        "device_map": device
+    }
     if checkpoint_path:
         model = SmolVLMForConditionalGeneration.from_pretrained(
             checkpoint_path,
+            **model_kwargs
         )
     else:
+        model = SmolVLMForConditionalGeneration.from_pretrained(
             base_model_id,
+            **model_kwargs
         )
     return model, processor
+# def load_model(
+#     checkpoint_path: Optional[str] = None,
+#     base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
+#     device: str = "cuda"
+# ):
+#     """Load the model and processor."""
+#     # For demonstration, we set the target size
+#     video_target_size = 384
+#     processor = AutoProcessor.from_pretrained(base_model_id)
+#     # Configure the image processor
+#     processor.image_processor.size = {"longest_edge": video_target_size}
+#     processor.image_processor.do_resize = True
+#     processor.image_processor.do_image_splitting = False
+#     if checkpoint_path:
+#         model = SmolVLMForConditionalGeneration.from_pretrained(
+#             checkpoint_path,
+#             torch_dtype=torch.bfloat16,
+#             device_map=device
+#         )
+#     else:
+#         model =  SmolVLMForConditionalGeneration.from_pretrained(
+#             base_model_id,
+#             torch_dtype=torch.bfloat16,
+#             device_map=device
+#         )
+#     return model, processor
 def main():
     checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
     base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"