Spaces:
Sleeping
Sleeping
JohnSmith9982
commited on
Commit
•
c21f04b
1
Parent(s):
f3b0f10
Upload models.py
Browse files- modules/models.py +16 -17
modules/models.py
CHANGED
@@ -84,9 +84,9 @@ class OpenAIClient(BaseLLMModel):
|
|
84 |
usage_data = self._get_billing_data(usage_url)
|
85 |
except Exception as e:
|
86 |
logging.error(f"获取API使用情况失败:" + str(e))
|
87 |
-
return
|
88 |
rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
|
89 |
-
return
|
90 |
except requests.exceptions.ConnectTimeout:
|
91 |
status_text = (
|
92 |
STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
@@ -96,7 +96,7 @@ class OpenAIClient(BaseLLMModel):
|
|
96 |
status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
97 |
return status_text
|
98 |
except Exception as e:
|
99 |
-
logging.error(
|
100 |
return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
|
101 |
|
102 |
def set_token_upper_limit(self, new_upper_limit):
|
@@ -105,7 +105,7 @@ class OpenAIClient(BaseLLMModel):
|
|
105 |
def set_key(self, new_access_key):
|
106 |
self.api_key = new_access_key.strip()
|
107 |
self._refresh_header()
|
108 |
-
msg =
|
109 |
logging.info(msg)
|
110 |
return msg
|
111 |
|
@@ -197,7 +197,7 @@ class OpenAIClient(BaseLLMModel):
|
|
197 |
try:
|
198 |
chunk = json.loads(chunk[6:])
|
199 |
except json.JSONDecodeError:
|
200 |
-
print(
|
201 |
error_msg += chunk
|
202 |
continue
|
203 |
if chunk_length > 6 and "delta" in chunk["choices"][0]:
|
@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
|
|
235 |
quantified = False
|
236 |
if "int4" in model_name:
|
237 |
quantified = True
|
238 |
-
|
239 |
-
model = AutoModel.from_pretrained(
|
240 |
model_source, trust_remote_code=True
|
241 |
-
)
|
242 |
-
else:
|
243 |
-
model = AutoModel.from_pretrained(
|
244 |
-
model_source, trust_remote_code=True
|
245 |
-
).half()
|
246 |
if torch.cuda.is_available():
|
247 |
# run on CUDA
|
248 |
logging.info("CUDA is available, using CUDA")
|
249 |
-
model = model.cuda()
|
250 |
# mps加速还存在一些问题,暂时不使用
|
251 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
252 |
logging.info("Running on macOS, using MPS")
|
253 |
# running on macOS and model already downloaded
|
254 |
-
model = model.to("mps")
|
255 |
else:
|
256 |
logging.info("GPU is not available, using CPU")
|
|
|
257 |
model = model.eval()
|
258 |
CHATGLM_MODEL = model
|
259 |
|
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
|
|
483 |
"data": question
|
484 |
}
|
485 |
response = requests.post(self.url, json=data)
|
486 |
-
|
487 |
-
|
|
|
|
|
|
|
488 |
|
489 |
|
490 |
|
@@ -497,7 +496,7 @@ def get_model(
|
|
497 |
top_p=None,
|
498 |
system_prompt=None,
|
499 |
) -> BaseLLMModel:
|
500 |
-
msg =
|
501 |
model_type = ModelType.get_type(model_name)
|
502 |
lora_selector_visibility = False
|
503 |
lora_choices = []
|
|
|
84 |
usage_data = self._get_billing_data(usage_url)
|
85 |
except Exception as e:
|
86 |
logging.error(f"获取API使用情况失败:" + str(e))
|
87 |
+
return i18n("**获取API使用情况失败**")
|
88 |
rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
|
89 |
+
return i18n("**本月使用金额** ") + f"\u3000 ${rounded_usage}"
|
90 |
except requests.exceptions.ConnectTimeout:
|
91 |
status_text = (
|
92 |
STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
|
|
96 |
status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
97 |
return status_text
|
98 |
except Exception as e:
|
99 |
+
logging.error(i18n("获取API使用情况失败:") + str(e))
|
100 |
return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
|
101 |
|
102 |
def set_token_upper_limit(self, new_upper_limit):
|
|
|
105 |
def set_key(self, new_access_key):
|
106 |
self.api_key = new_access_key.strip()
|
107 |
self._refresh_header()
|
108 |
+
msg = i18n("API密钥更改为了") + f"{hide_middle_chars(self.api_key)}"
|
109 |
logging.info(msg)
|
110 |
return msg
|
111 |
|
|
|
197 |
try:
|
198 |
chunk = json.loads(chunk[6:])
|
199 |
except json.JSONDecodeError:
|
200 |
+
print(i18n("JSON解析错误,收到的内容: ") + f"{chunk}")
|
201 |
error_msg += chunk
|
202 |
continue
|
203 |
if chunk_length > 6 and "delta" in chunk["choices"][0]:
|
|
|
235 |
quantified = False
|
236 |
if "int4" in model_name:
|
237 |
quantified = True
|
238 |
+
model = AutoModel.from_pretrained(
|
|
|
239 |
model_source, trust_remote_code=True
|
240 |
+
)
|
|
|
|
|
|
|
|
|
241 |
if torch.cuda.is_available():
|
242 |
# run on CUDA
|
243 |
logging.info("CUDA is available, using CUDA")
|
244 |
+
model = model.half().cuda()
|
245 |
# mps加速还存在一些问题,暂时不使用
|
246 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
247 |
logging.info("Running on macOS, using MPS")
|
248 |
# running on macOS and model already downloaded
|
249 |
+
model = model.half().to("mps")
|
250 |
else:
|
251 |
logging.info("GPU is not available, using CPU")
|
252 |
+
model = model.float()
|
253 |
model = model.eval()
|
254 |
CHATGLM_MODEL = model
|
255 |
|
|
|
479 |
"data": question
|
480 |
}
|
481 |
response = requests.post(self.url, json=data)
|
482 |
+
try:
|
483 |
+
response = json.loads(response.text)
|
484 |
+
return response["data"], len(response["data"])
|
485 |
+
except Exception as e:
|
486 |
+
return response.text, len(response.text)
|
487 |
|
488 |
|
489 |
|
|
|
496 |
top_p=None,
|
497 |
system_prompt=None,
|
498 |
) -> BaseLLMModel:
|
499 |
+
msg = i18n("模型设置为了:") + f" {model_name}"
|
500 |
model_type = ModelType.get_type(model_name)
|
501 |
lora_selector_visibility = False
|
502 |
lora_choices = []
|