Sergidev commited on
Commit
6d5ea03
·
1 Parent(s): b9a7bd3
Files changed (1) hide show
  1. modules/pmbl.py +41 -21
modules/pmbl.py CHANGED
@@ -72,7 +72,7 @@ class PMBL:
72
  except Exception as e:
73
  print(f"Error preparing model: {e}")
74
  # Fall back to using a smaller model that's more easily handled
75
- return "TheBloke/Llama-2-7B-Chat-GGUF"
76
 
77
  def _load_history_sync(self):
78
  """Load chat history from local file"""
@@ -176,25 +176,16 @@ class PMBL:
176
  )
177
 
178
  try:
179
- # Check if we have a prepared model or need to use a backup
180
- if "TheBloke" in self.prepared_model_path:
181
- # Use TheBloke model as fallback
182
- print("Using fallback model")
183
- model = AutoModelForCausalLM.from_pretrained(
184
- self.prepared_model_path,
185
- model_type="llama",
186
- gpu_layers=50,
187
- context_length=n_ctx
188
- )
189
- else:
190
- # Use our merged Qwen model
191
- print(f"Loading model from: {self.prepared_model_path}")
192
- model = AutoModelForCausalLM.from_pretrained(
193
- model_file=self.prepared_model_path,
194
- model_type="llama",
195
- gpu_layers=50,
196
- context_length=n_ctx
197
- )
198
 
199
  # Generate response with streaming
200
  response = model(
@@ -213,7 +204,36 @@ class PMBL:
213
 
214
  except Exception as e:
215
  print(f"Error generating response: {e}")
216
- yield f"I'm sorry, I encountered an error: {str(e)}. Please try again or contact support."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  def save_chat(self, prompt, response):
219
  """Save chat to history"""
 
72
  except Exception as e:
73
  print(f"Error preparing model: {e}")
74
  # Fall back to using a smaller model that's more easily handled
75
+ return None
76
 
77
  def _load_history_sync(self):
78
  """Load chat history from local file"""
 
176
  )
177
 
178
  try:
179
+ print(f"Loading model from: {self.prepared_model_path}")
180
+
181
+ # The correct way to load a local model file with ctransformers
182
+ model = AutoModelForCausalLM.from_pretrained(
183
+ "TheBloke/Llama-2-7B-Chat-GGUF", # This is just a placeholder, we'll use the local file
184
+ model_file=self.prepared_model_path, # Specify the actual file to use
185
+ model_type="llama",
186
+ gpu_layers=50,
187
+ context_length=n_ctx
188
+ )
 
 
 
 
 
 
 
 
 
189
 
190
  # Generate response with streaming
191
  response = model(
 
204
 
205
  except Exception as e:
206
  print(f"Error generating response: {e}")
207
+
208
+ # Fall back to the smaller model
209
+ try:
210
+ fallback_model = AutoModelForCausalLM.from_pretrained(
211
+ "TheBloke/Llama-2-7B-Chat-GGUF",
212
+ model_type="llama",
213
+ gpu_layers=50,
214
+ context_length=n_ctx
215
+ )
216
+
217
+ fallback_response = fallback_model(
218
+ system_prompt,
219
+ max_new_tokens=1024,
220
+ temperature=0.6,
221
+ top_p=0.95,
222
+ top_k=30,
223
+ stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
224
+ stream=True
225
+ )
226
+
227
+ # First yield an error message
228
+ yield f"I encountered an error with the primary model, switching to backup: {str(e)}\n\n"
229
+
230
+ # Then yield the fallback model's response
231
+ for chunk in fallback_response:
232
+ yield chunk
233
+
234
+ except Exception as fallback_error:
235
+ # If even the fallback fails, return a simple error message
236
+ yield f"I'm sorry, both models encountered errors. Original error: {str(e)}. Fallback error: {str(fallback_error)}. Please try again with a simpler query."
237
 
238
  def save_chat(self, prompt, response):
239
  """Save chat to history"""