JohnSmith9982 commited on
Commit
c21f04b
1 Parent(s): f3b0f10

Upload models.py

Browse files
Files changed (1) hide show
  1. modules/models.py +16 -17
modules/models.py CHANGED
@@ -84,9 +84,9 @@ class OpenAIClient(BaseLLMModel):
84
  usage_data = self._get_billing_data(usage_url)
85
  except Exception as e:
86
  logging.error(f"获取API使用情况失败:" + str(e))
87
- return f"**获取API使用情况失败**"
88
  rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
89
- return f"**本月使用金额** \u3000 ${rounded_usage}"
90
  except requests.exceptions.ConnectTimeout:
91
  status_text = (
92
  STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
@@ -96,7 +96,7 @@ class OpenAIClient(BaseLLMModel):
96
  status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
97
  return status_text
98
  except Exception as e:
99
- logging.error(f"获取API使用情况失败:" + str(e))
100
  return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
101
 
102
  def set_token_upper_limit(self, new_upper_limit):
@@ -105,7 +105,7 @@ class OpenAIClient(BaseLLMModel):
105
  def set_key(self, new_access_key):
106
  self.api_key = new_access_key.strip()
107
  self._refresh_header()
108
- msg = f"API密钥更改为了{hide_middle_chars(self.api_key)}"
109
  logging.info(msg)
110
  return msg
111
 
@@ -197,7 +197,7 @@ class OpenAIClient(BaseLLMModel):
197
  try:
198
  chunk = json.loads(chunk[6:])
199
  except json.JSONDecodeError:
200
- print(f"JSON解析错误,收到的内容: {chunk}")
201
  error_msg += chunk
202
  continue
203
  if chunk_length > 6 and "delta" in chunk["choices"][0]:
@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
235
  quantified = False
236
  if "int4" in model_name:
237
  quantified = True
238
- if quantified:
239
- model = AutoModel.from_pretrained(
240
  model_source, trust_remote_code=True
241
- ).half()
242
- else:
243
- model = AutoModel.from_pretrained(
244
- model_source, trust_remote_code=True
245
- ).half()
246
  if torch.cuda.is_available():
247
  # run on CUDA
248
  logging.info("CUDA is available, using CUDA")
249
- model = model.cuda()
250
  # mps加速还存在一些问题,暂时不使用
251
  elif system_name == "Darwin" and model_path is not None and not quantified:
252
  logging.info("Running on macOS, using MPS")
253
  # running on macOS and model already downloaded
254
- model = model.to("mps")
255
  else:
256
  logging.info("GPU is not available, using CPU")
 
257
  model = model.eval()
258
  CHATGLM_MODEL = model
259
 
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
483
  "data": question
484
  }
485
  response = requests.post(self.url, json=data)
486
- response = json.loads(response.text)
487
- return response["data"], len(response["data"])
 
 
 
488
 
489
 
490
 
@@ -497,7 +496,7 @@ def get_model(
497
  top_p=None,
498
  system_prompt=None,
499
  ) -> BaseLLMModel:
500
- msg = f"模型设置为了: {model_name}"
501
  model_type = ModelType.get_type(model_name)
502
  lora_selector_visibility = False
503
  lora_choices = []
 
84
  usage_data = self._get_billing_data(usage_url)
85
  except Exception as e:
86
  logging.error(f"获取API使用情况失败:" + str(e))
87
+ return i18n("**获取API使用情况失败**")
88
  rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
89
+ return i18n("**本月使用金额** ") + f"\u3000 ${rounded_usage}"
90
  except requests.exceptions.ConnectTimeout:
91
  status_text = (
92
  STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
 
96
  status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
97
  return status_text
98
  except Exception as e:
99
+ logging.error(i18n("获取API使用情况失败:") + str(e))
100
  return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
101
 
102
  def set_token_upper_limit(self, new_upper_limit):
 
105
  def set_key(self, new_access_key):
106
  self.api_key = new_access_key.strip()
107
  self._refresh_header()
108
+ msg = i18n("API密钥更改为了") + f"{hide_middle_chars(self.api_key)}"
109
  logging.info(msg)
110
  return msg
111
 
 
197
  try:
198
  chunk = json.loads(chunk[6:])
199
  except json.JSONDecodeError:
200
+ print(i18n("JSON解析错误,收到的内容: ") + f"{chunk}")
201
  error_msg += chunk
202
  continue
203
  if chunk_length > 6 and "delta" in chunk["choices"][0]:
 
235
  quantified = False
236
  if "int4" in model_name:
237
  quantified = True
238
+ model = AutoModel.from_pretrained(
 
239
  model_source, trust_remote_code=True
240
+ )
 
 
 
 
241
  if torch.cuda.is_available():
242
  # run on CUDA
243
  logging.info("CUDA is available, using CUDA")
244
+ model = model.half().cuda()
245
  # mps加速还存在一些问题,暂时不使用
246
  elif system_name == "Darwin" and model_path is not None and not quantified:
247
  logging.info("Running on macOS, using MPS")
248
  # running on macOS and model already downloaded
249
+ model = model.half().to("mps")
250
  else:
251
  logging.info("GPU is not available, using CPU")
252
+ model = model.float()
253
  model = model.eval()
254
  CHATGLM_MODEL = model
255
 
 
479
  "data": question
480
  }
481
  response = requests.post(self.url, json=data)
482
+ try:
483
+ response = json.loads(response.text)
484
+ return response["data"], len(response["data"])
485
+ except Exception as e:
486
+ return response.text, len(response.text)
487
 
488
 
489
 
 
496
  top_p=None,
497
  system_prompt=None,
498
  ) -> BaseLLMModel:
499
+ msg = i18n("模型设置为了:") + f" {model_name}"
500
  model_type = ModelType.get_type(model_name)
501
  lora_selector_visibility = False
502
  lora_choices = []