Spaces:

DawnC
/

PawMatchAI

Running on Zero

App Files Files Community

DawnC commited on 29 days ago

Commit

8e90922

•

1 Parent(s): 14ee6e4

Update device_manager.py

Browse files

Files changed (1) hide show

device_manager.py +25 -49

device_manager.py CHANGED Viewed

@@ -2,6 +2,7 @@ from functools import wraps
 import torch
 import os
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -21,60 +22,35 @@ class DeviceManager:
         self._initialized = True
         self._current_device = None
-        self.initialize_zero_gpu()
-    def initialize_zero_gpu(self):
-        """初始化 ZeroGPU"""
         try:
-            # 檢查是否在 Hugging Face Spaces 環境中
             if os.environ.get('SPACE_ID'):
-                # 嘗試初始化 ZeroGPU
-                os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-                # 設置必要的環境變數
-                os.environ['ZERO_GPU'] = '1'
-                logger.info("ZeroGPU environment initialized")
-        except Exception as e:
-            logger.warning(f"Failed to initialize ZeroGPU environment: {e}")
-    def check_zero_gpu_availability(self):
-        """檢查 ZeroGPU 是否可用"""
-        try:
-            if os.environ.get('SPACE_ID') and os.environ.get('ZERO_GPU') == '1':
-                # 確保 CUDA 運行時環境正確設置
-                if torch.cuda.is_available():
-                    torch.cuda.init()
-                    return True
         except Exception as e:
-            logger.warning(f"ZeroGPU check failed: {e}")
-        return False
     def get_optimal_device(self):
-        """獲取最佳可用設備"""
-        if self._current_device is None:
-            if self.check_zero_gpu_availability():
-                try:
-                    self._current_device = torch.device('cuda')
-                    logger.info("Using ZeroGPU")
-                    # 嘗試進行一次小規模的 CUDA 操作來驗證
-                    torch.zeros(1).cuda()
-                except Exception as e:
-                    logger.warning(f"Failed to use ZeroGPU: {e}")
-                    self._current_device = torch.device('cpu')
-                    logger.info("Fallback to CPU")
-            else:
-                self._current_device = torch.device('cpu')
-                logger.info("Using CPU (ZeroGPU not available)")
         return self._current_device
-    def move_to_device(self, tensor_or_model):
-        """將張量或模型移動到最佳設備"""
-        device = self.get_optimal_device()
         try:
-            if hasattr(tensor_or_model, 'to'):
-                return tensor_or_model.to(device)
-        except Exception as e:
-            logger.warning(f"Failed to move to {device}, falling back to CPU: {e}")
-            self._current_device = torch.device('cpu')
-            if hasattr(tensor_or_model, 'to'):
-                return tensor_or_model.to('cpu')
-        return tensor_or_model

 import torch
 import os
 import logging
+import spaces
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
         self._initialized = True
         self._current_device = None
         try:
             if os.environ.get('SPACE_ID'):
+                # 使用 spaces 的 GPU wrapper 進行初始化
+                @spaces.GPU
+                def init_gpu():
+                    return torch.device('cuda')
+                self._current_device = init_gpu()
+                logger.info("ZeroGPU initialized successfully")
+            else:
+                self._current_device = torch.device('cpu')
         except Exception as e:
+            logger.warning(f"Failed to initialize ZeroGPU: {e}")
+            self._current_device = torch.device('cpu')
     def get_optimal_device(self):
         return self._current_device
+def device_handler(func):
+    """Decorator for handling device placement with ZeroGPU support"""
+    @spaces.GPU
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
         try:
+            return await func(*args, **kwargs)
+        except RuntimeError as e:
+            if "out of memory" in str(e) or "CUDA" in str(e):
+                logger.warning("ZeroGPU unavailable, falling back to CPU")
+                device_mgr = DeviceManager()
+                device_mgr._current_device = torch.device('cpu')
+                return await func(*args, **kwargs)
+            raise e
+    return wrapper