paper_generate / llm.py
weiwei1392
add new demo
691ae91
from typing import Optional, List
# from langchain.llms.utils import enforce_stop_tokens
# import torch
import requests
# import logging
# from transformers import AutoTokenizer, AutoModel, AutoConfig
# logging.basicConfig(filename='chat_log.txt', level=logging.INFO)
DEVICE = "cuda"
FORWARD_KEY = 'fk198719-Pmvv22OqZiovaxRq6YxCzkTcd6UVVX5O0'
# def torch_gc():
# if torch.cuda.is_available():
# with torch.cuda.device(DEVICE):
# torch.cuda.empty_cache()
# torch.cuda.ipc_collect()
class ChatGLM:
max_length: int = 10000
temperature: float = 0
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
URL = 'http://183.131.3.48:9200'
HEADERS = {'Content-Type': 'application/json'}
@property
def _llm_type(self) -> str:
return "ChatGLM"
def __call__(self,
prompt: str,
history: Optional[List[list[str]]] = None,
stop: Optional[List[str]] = None) -> str:
# print('\n\n\n\n')
# print('-------------------------------------------------------------------------------------------------------')
# print(' ****** prompt ****** ')
# print(prompt)
if history:
history = [i for i in history if i[0] is not None] # clear out the system message
history = history[-self.history_len:]
params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p,
'max_length': self.max_length, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['response']
# question = prompt.split('question:\n')[-1]
# self.history = self.history+[[prompt, response]]
# print(" ****** GLM_answer ****** ")
# print(answer)
# print('-------------------------------------------------------------------------------------------------------')
# print('\n\n\n\n')
return answer
class LocalChatGLM:
max_length: int = 10000
temperature: float = 0
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
@property
def _llm_type(self) -> str:
return "ChatGLM"
def __call__(self,
prompt: str,
history: List[List[str]] = [],
stop: Optional[List[str]] = None) -> str:
# print('\n\n\n\n')
# print('-------------------------------------------------------------------------------------------------------')
# print('**************** prompt ****************:')
# print(prompt)
response, _ = self.model.chat(
self.tokenizer,
prompt,
history=history[-self.history_len:] if self.history_len > 0 else [],
max_length=self.max_length,
temperature=self.temperature,
)
# torch_gc()
# if stop is not None:
# response = enforce_stop_tokens(response, stop)
question = prompt.split('question:\n')[-1]
self.history = self.history+[[question, response]]
# print("*********************** answer **************************:")
# print(response)
# print('-------------------------------------------------------------------------------------------------------')
# print('\n\n\n\n')
return response
# @classmethod
# def load_model(cls,
# model_name_or_path: str = "THUDM/chatglm-6b"):
# tokenizer = AutoTokenizer.from_pretrained(
# model_name_or_path,
# trust_remote_code=True
# )
# if torch.cuda.is_available() and DEVICE.lower().startswith("cuda"):
# model = (
# AutoModel.from_pretrained(
# model_name_or_path,
# trust_remote_code=True)
# .half()
# .cuda()
# )
# else:
# model = (
# AutoModel.from_pretrained(
# model_name_or_path,
# trust_remote_code=True)
# .float()
# .to(DEVICE)
# )
# llm = cls()
# llm.tokenizer = tokenizer
# llm.model = model
# return llm
class OpenAI3:
max_length: int = 10000
temperature: float = 0.2
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
URL ='https://openai.api2d.net/v1/chat/completions'
MODEL_NAME = "gpt-3.5-turbo"
@property
def _llm_type(self) -> str:
return "OPENAI3"
def __call__(self,
prompt: str,
history: Optional[List[List[str]]] = None,
stop: Optional[List[str]] = None) -> str:
message = [{"role": "user", "content": prompt}]
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['choices'][0]['message']['content']
# if stop is not None:
# answer = enforce_stop_tokens(answer, stop)
return answer
class OpenAI4:
max_length: int = 10000
temperature: float = 0.2
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
URL ='https://openai.api2d.net/v1/chat/completions'
MODEL_NAME = "gpt-4"
@property
def _llm_type(self) -> str:
return "OPENAI4"
def __call__(self,
prompt: str,
history: Optional[List[List[str]]] = None,
stop: Optional[List[str]] = None) -> str:
message = [{"role": "user", "content": prompt}]
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['choices'][0]['message']['content']
# if stop is not None:
# answer = enforce_stop_tokens(answer, stop)
return answer