|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
from langchain import HuggingFaceHub |
|
from langchain.llms.base import LLM |
|
from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory |
|
from langchain.chains import LLMChain, ConversationChain |
|
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from langchain.prompts import PromptTemplate, ChatPromptTemplate |
|
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate |
|
import streamlit as st |
|
|
|
your_endpoint_url = "https://kp4xdy196cw81uf3.us-east-1.aws.endpoints.huggingface.cloud" |
|
token = st.secrets["HUGGINGFACEHUB_API_TOKEN"] |
|
|
|
llm = HuggingFaceEndpoint( |
|
endpoint_url=f"{your_endpoint_url}", |
|
huggingfacehub_api_token = f"{token}", |
|
task = "text-generation", |
|
max_new_tokens=128, |
|
top_k=10, |
|
top_p=0.95, |
|
typical_p=0.95, |
|
temperature=0.01, |
|
repetition_penalty=1.03 |
|
) |
|
|
|
|
|
def chat_template_prompt(): |
|
template = """ |
|
Do not repeat questions and do not generate answer for user/human. |
|
|
|
You are a helpful hotel booking asssitant. |
|
Below is an instruction that describes a task. |
|
Write a response that appropriately completes the request. |
|
Reply with the most helpful and logic answer. During the conversation you need to ask the user |
|
the following questions to complete the hotel booking task. |
|
1) Where would you like to stay and when? |
|
2) How many people are staying in the room? |
|
3) Do you prefer any ammenities like breakfast included or gym? |
|
4) What is your name, your email address and phone number? |
|
|
|
When the booking task is completed, respond with "Thank you for choosing us.". |
|
|
|
{history} |
|
|
|
""" |
|
|
|
system_prompt = SystemMessagePromptTemplate.from_template(template) |
|
human_prompt = HumanMessagePromptTemplate.from_template("{input}") |
|
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt]) |
|
return chat_prompt |
|
|
|
def chain(): |
|
|
|
chat_prompt = chat_template_prompt() |
|
memory = ConversationBufferWindowMemory(k=3) |
|
llm_chain = LLMChain(llm=llm, memory = memory, prompt = chat_prompt) |
|
memory.load_memory_variables({}) |
|
return llm_chain |
|
|
|
|