RAGOndevice

Running on Zero

cutechicken commited on 23 days ago

Commit

a188372

•

1 Parent(s): 4c60e0e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,18 +14,24 @@ import spaces
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = "CohereForAI/c4ai-command-r-plus-08-2024"
 class ModelManager:
     def __init__(self):
         self.model = None
         self.tokenizer = None
         self.setup_model()
     def setup_model(self):
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(
                 MODEL_ID,
                 token=HF_TOKEN,
-                trust_remote_code=True
             )
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
@@ -33,22 +39,11 @@ class ModelManager:
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True,
-                low_cpu_mem_usage=True
             )
         except Exception as e:
-            print(f"Error loading model: {e}")
-            # Fallback to basic loading without device_map
-            try:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    MODEL_ID,
-                    token=HF_TOKEN,
-                    torch_dtype=torch.float16,
-                    trust_remote_code=True
-                )
-            except Exception as e:
-                raise Exception(f"Model loading failed completely: {e}")
 class ChatHistory:
     def __init__(self):

 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = "CohereForAI/c4ai-command-r-plus-08-2024"
+os.environ["TRANSFORMERS_CACHE"] = "/persistent/transformers_cache"
+os.environ["TORCH_HOME"] = "/persistent/torch_cache"
+os.environ["HF_HOME"] = "/persistent/huggingface"
 class ModelManager:
     def __init__(self):
+        self.cache_dir = "/persistent/model_cache"
         self.model = None
         self.tokenizer = None
         self.setup_model()
     def setup_model(self):
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(
                 MODEL_ID,
                 token=HF_TOKEN,
+                trust_remote_code=True,
+                cache_dir=self.cache_dir
             )
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                cache_dir=self.cache_dir
             )
         except Exception as e:
+            raise Exception(f"Model loading failed: {e}")
 class ChatHistory:
     def __init__(self):