Spaces:

Sergidev
/

PMB

Running on Zero

App Files Files Community

Sergidev commited on 3 days ago

Commit

6d5ea03

1 Parent(s): b9a7bd3

v5

Browse files

Files changed (1) hide show

modules/pmbl.py +41 -21

modules/pmbl.py CHANGED Viewed

@@ -72,7 +72,7 @@ class PMBL:
         except Exception as e:
             print(f"Error preparing model: {e}")
             # Fall back to using a smaller model that's more easily handled
-            return "TheBloke/Llama-2-7B-Chat-GGUF"
     def _load_history_sync(self):
         """Load chat history from local file"""
@@ -176,25 +176,16 @@ class PMBL:
         )
         try:
-            # Check if we have a prepared model or need to use a backup
-            if "TheBloke" in self.prepared_model_path:
-                # Use TheBloke model as fallback
-                print("Using fallback model")
-                model = AutoModelForCausalLM.from_pretrained(
-                    self.prepared_model_path,
-                    model_type="llama",
-                    gpu_layers=50,
-                    context_length=n_ctx
-                )
-            else:
-                # Use our merged Qwen model
-                print(f"Loading model from: {self.prepared_model_path}")
-                model = AutoModelForCausalLM.from_pretrained(
-                    model_file=self.prepared_model_path,
-                    model_type="llama",
-                    gpu_layers=50,
-                    context_length=n_ctx
-                )
             # Generate response with streaming
             response = model(
@@ -213,7 +204,36 @@ class PMBL:
         except Exception as e:
             print(f"Error generating response: {e}")
-            yield f"I'm sorry, I encountered an error: {str(e)}. Please try again or contact support."
     def save_chat(self, prompt, response):
         """Save chat to history"""

         except Exception as e:
             print(f"Error preparing model: {e}")
             # Fall back to using a smaller model that's more easily handled
+            return None
     def _load_history_sync(self):
         """Load chat history from local file"""
         )
         try:
+            print(f"Loading model from: {self.prepared_model_path}")
+            # The correct way to load a local model file with ctransformers
+            model = AutoModelForCausalLM.from_pretrained(
+                "TheBloke/Llama-2-7B-Chat-GGUF",  # This is just a placeholder, we'll use the local file
+                model_file=self.prepared_model_path,  # Specify the actual file to use
+                model_type="llama",
+                gpu_layers=50,
+                context_length=n_ctx
+            )
             # Generate response with streaming
             response = model(
         except Exception as e:
             print(f"Error generating response: {e}")
+            # Fall back to the smaller model
+            try:
+                fallback_model = AutoModelForCausalLM.from_pretrained(
+                    "TheBloke/Llama-2-7B-Chat-GGUF",
+                    model_type="llama",
+                    gpu_layers=50,
+                    context_length=n_ctx
+                )
+                fallback_response = fallback_model(
+                    system_prompt,
+                    max_new_tokens=1024,
+                    temperature=0.6,
+                    top_p=0.95,
+                    top_k=30,
+                    stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
+                    stream=True
+                )
+                # First yield an error message
+                yield f"I encountered an error with the primary model, switching to backup: {str(e)}\n\n"
+                # Then yield the fallback model's response
+                for chunk in fallback_response:
+                    yield chunk
+            except Exception as fallback_error:
+                # If even the fallback fails, return a simple error message
+                yield f"I'm sorry, both models encountered errors. Original error: {str(e)}. Fallback error: {str(fallback_error)}. Please try again with a simpler query."
     def save_chat(self, prompt, response):
         """Save chat to history"""