whyumesh commited on
Commit
f0b1947
·
verified ·
1 Parent(s): 81838bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -5,10 +5,16 @@ from PIL import Image
5
  import cv2
6
  import numpy as np
7
  import os
8
-
9
  import torch
10
 
11
- # Optimize for A100
 
 
 
 
 
 
 
12
  torch.backends.cuda.matmul.allow_tf32 = True
13
  torch.backends.cudnn.allow_tf32 = True
14
 
@@ -17,14 +23,16 @@ if torch.cuda.is_available():
17
  torch.set_default_tensor_type('torch.cuda.FloatTensor')
18
 
19
 
20
-
21
  def load_model():
22
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  try:
 
 
 
24
  model = Qwen2VLForConditionalGeneration.from_pretrained(
25
  "Qwen/Qwen2-VL-2B-Instruct",
26
- torch_dtype=torch.float16, # Use float16 for faster inference on GPU
27
- device_map="auto" # This will automatically handle multi-GPU setups
 
28
  )
29
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
30
  return model, processor, device
 
5
  import cv2
6
  import numpy as np
7
  import os
 
8
  import torch
9
 
10
+ print(f"PyTorch version: {torch.__version__}")
11
+ print(f"CUDA available: {torch.cuda.is_available()}")
12
+ if torch.cuda.is_available():
13
+ print(f"CUDA version: {torch.version.cuda}")
14
+ print(f"Device count: {torch.cuda.device_count()}")
15
+ print(f"Current device: {torch.cuda.current_device()}")
16
+ print(f"Device name: {torch.cuda.get_device_name()}")
17
+
18
  torch.backends.cuda.matmul.allow_tf32 = True
19
  torch.backends.cudnn.allow_tf32 = True
20
 
 
23
  torch.set_default_tensor_type('torch.cuda.FloatTensor')
24
 
25
 
 
26
  def load_model():
 
27
  try:
28
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ print(f"Using device: {device}")
30
+
31
  model = Qwen2VLForConditionalGeneration.from_pretrained(
32
  "Qwen/Qwen2-VL-2B-Instruct",
33
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
34
+ device_map="auto",
35
+ low_cpu_mem_usage=True
36
  )
37
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
38
  return model, processor, device