from typing import Any, List, Mapping, Optional from langchain_core.callbacks.manager import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from typing import Literal import requests from langchain.prompts import PromptTemplate, ChatPromptTemplate from operator import itemgetter # from langchain.memory import ConversationBufferMemory from langchain.memory import ChatMessageHistory, ConversationBufferMemory from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_community.chat_models import ChatOpenAI from langchain_core.runnables import RunnableLambda, RunnablePassthrough from langchain_core.messages import AIMessage, HumanMessage def custom_chain_with_history(llm, memory): prompt = PromptTemplate.from_template("""<|system|> You are a college student at Gunadarma University, majoring in Informatics / Computer Science. Your name is Jonathan Jordan, but people call you Jojo. You have expertise in Artificial Intelligence and Data Science. You are also good at Data Engineering with AirFlow and Apache Spark. You currently live in Jakarta, Indonesia. You are 21 years old. Your birthday is Friday, 21 June 2002. Below are the detailed personal information of you: {context} Respond to the user with the following chat history between you and the user: {chat_history} <|interviewer|> {question} <|you|> """) def prompt_memory(_): t = "" for x in memory.chat_memory.messages: t += f"<|assistant|>\n{x.content}\n" if type(x) is AIMessage else f"<|user|>\n{x.content}\n" # t += f"You: {x.content}\n" if type(x) is AIMessage else f"User: {x.content}\n" print(q) return "" if len(t) == 0 else t def format_docs(docs): print(len(docs)) return "\n".join([f"{i+1}. {d.page_content}" for i,d in enumerate(docs)]) # prompt = ChatPromptTemplate.from_messages( # [ # ("system", "You are a helpful chatbot"), # MessagesPlaceholder(variable_name="history"), # ("human", "{input}"), # ] # ) return {"chat_history":prompt_memory, "context":db.as_retriever(search_type="similarity", search_kwargs={"k": 8}) | format_docs, "question": RunnablePassthrough()} | prompt | llm class CustomLLM(LLM): repo_id : str api_token : str model_type: Literal["text2text-generation", "text-generation"] max_new_tokens: int = None temperature: float = 0.001 timeout: float = None top_p: float = None top_k : int = None repetition_penalty : float = None stop : List[str] = [] @property def _llm_type(self) -> str: return "custom" def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: headers = {"Authorization": f"Bearer {self.api_token}"} API_URL = f"https://api-inference.huggingface.co/models/{self.repo_id}" parameters_dict = { 'max_new_tokens': self.max_new_tokens, 'temperature': self.temperature, 'timeout': self.timeout, 'top_p': self.top_p, 'top_k': self.top_k, 'repetition_penalty': self.repetition_penalty, 'stop':self.stop } if self.model_type == 'text-generation': parameters_dict["return_full_text"]=False data = {"inputs": prompt, "parameters":parameters_dict, "options":{"wait_for_model":True}} data = requests.post(API_URL, headers=headers, json=data).json() print(data) return data[0]['generated_text'] @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" return { 'repo_id': self.repo_id, 'model_type':self.model_type, 'stop_sequences':self.stop,, 'max_new_tokens': self.max_new_tokens, 'temperature': self.temperature, 'timeout': self.timeout, 'top_p': self.top_p, 'top_k': self.top_k, 'repetition_penalty': self.repetition_penalty }