Spaces:
Runtime error
Runtime error
from typing import Optional, List | |
# from langchain.llms.utils import enforce_stop_tokens | |
# import torch | |
import requests | |
# import logging | |
# from transformers import AutoTokenizer, AutoModel, AutoConfig | |
# logging.basicConfig(filename='chat_log.txt', level=logging.INFO) | |
DEVICE = "cuda" | |
FORWARD_KEY = 'fk198719-Pmvv22OqZiovaxRq6YxCzkTcd6UVVX5O0' | |
# def torch_gc(): | |
# if torch.cuda.is_available(): | |
# with torch.cuda.device(DEVICE): | |
# torch.cuda.empty_cache() | |
# torch.cuda.ipc_collect() | |
class ChatGLM: | |
max_length: int = 10000 | |
temperature: float = 0 | |
top_p = 0.9 | |
tokenizer: object = None | |
model: object = None | |
history_len: int = 10 | |
history = [] | |
URL = 'http://183.131.3.48:9200' | |
HEADERS = {'Content-Type': 'application/json'} | |
def _llm_type(self) -> str: | |
return "ChatGLM" | |
def __call__(self, | |
prompt: str, | |
history: Optional[List[list[str]]] = None, | |
stop: Optional[List[str]] = None) -> str: | |
# print('\n\n\n\n') | |
# print('-------------------------------------------------------------------------------------------------------') | |
# print(' ****** prompt ****** ') | |
# print(prompt) | |
if history: | |
history = [i for i in history if i[0] is not None] # clear out the system message | |
history = history[-self.history_len:] | |
params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p, | |
'max_length': self.max_length, 'temperature': self.temperature} | |
response = requests.post(self.URL, headers=self.HEADERS, json=params).json() | |
answer = response['response'] | |
# question = prompt.split('question:\n')[-1] | |
# self.history = self.history+[[prompt, response]] | |
# print(" ****** GLM_answer ****** ") | |
# print(answer) | |
# print('-------------------------------------------------------------------------------------------------------') | |
# print('\n\n\n\n') | |
return answer | |
class LocalChatGLM: | |
max_length: int = 10000 | |
temperature: float = 0 | |
top_p = 0.9 | |
tokenizer: object = None | |
model: object = None | |
history_len: int = 10 | |
history = [] | |
def _llm_type(self) -> str: | |
return "ChatGLM" | |
def __call__(self, | |
prompt: str, | |
history: List[List[str]] = [], | |
stop: Optional[List[str]] = None) -> str: | |
# print('\n\n\n\n') | |
# print('-------------------------------------------------------------------------------------------------------') | |
# print('**************** prompt ****************:') | |
# print(prompt) | |
response, _ = self.model.chat( | |
self.tokenizer, | |
prompt, | |
history=history[-self.history_len:] if self.history_len > 0 else [], | |
max_length=self.max_length, | |
temperature=self.temperature, | |
) | |
# torch_gc() | |
# if stop is not None: | |
# response = enforce_stop_tokens(response, stop) | |
question = prompt.split('question:\n')[-1] | |
self.history = self.history+[[question, response]] | |
# print("*********************** answer **************************:") | |
# print(response) | |
# print('-------------------------------------------------------------------------------------------------------') | |
# print('\n\n\n\n') | |
return response | |
# @classmethod | |
# def load_model(cls, | |
# model_name_or_path: str = "THUDM/chatglm-6b"): | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# model_name_or_path, | |
# trust_remote_code=True | |
# ) | |
# if torch.cuda.is_available() and DEVICE.lower().startswith("cuda"): | |
# model = ( | |
# AutoModel.from_pretrained( | |
# model_name_or_path, | |
# trust_remote_code=True) | |
# .half() | |
# .cuda() | |
# ) | |
# else: | |
# model = ( | |
# AutoModel.from_pretrained( | |
# model_name_or_path, | |
# trust_remote_code=True) | |
# .float() | |
# .to(DEVICE) | |
# ) | |
# llm = cls() | |
# llm.tokenizer = tokenizer | |
# llm.model = model | |
# return llm | |
class OpenAI3: | |
max_length: int = 10000 | |
temperature: float = 0.2 | |
top_p = 0.9 | |
tokenizer: object = None | |
model: object = None | |
history_len: int = 10 | |
history = [] | |
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'} | |
URL ='https://openai.api2d.net/v1/chat/completions' | |
MODEL_NAME = "gpt-3.5-turbo" | |
def _llm_type(self) -> str: | |
return "OPENAI3" | |
def __call__(self, | |
prompt: str, | |
history: Optional[List[List[str]]] = None, | |
stop: Optional[List[str]] = None) -> str: | |
message = [{"role": "user", "content": prompt}] | |
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} | |
response = requests.post(self.URL, headers=self.HEADERS, json=params).json() | |
answer = response['choices'][0]['message']['content'] | |
# if stop is not None: | |
# answer = enforce_stop_tokens(answer, stop) | |
return answer | |
class OpenAI4: | |
max_length: int = 10000 | |
temperature: float = 0.2 | |
top_p = 0.9 | |
tokenizer: object = None | |
model: object = None | |
history_len: int = 10 | |
history = [] | |
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'} | |
URL ='https://openai.api2d.net/v1/chat/completions' | |
MODEL_NAME = "gpt-4" | |
def _llm_type(self) -> str: | |
return "OPENAI4" | |
def __call__(self, | |
prompt: str, | |
history: Optional[List[List[str]]] = None, | |
stop: Optional[List[str]] = None) -> str: | |
message = [{"role": "user", "content": prompt}] | |
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} | |
response = requests.post(self.URL, headers=self.HEADERS, json=params).json() | |
answer = response['choices'][0]['message']['content'] | |
# if stop is not None: | |
# answer = enforce_stop_tokens(answer, stop) | |
return answer |