Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
# import openai | |
from io import StringIO | |
from langchain.chat_models import ChatOpenAI | |
from langchain import OpenAI, LLMChain, PromptTemplate | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import PyPDFLoader | |
# from langchain.chains import ConversationalRetrievalChain | |
from langchain.chains.summarize import load_summarize_chain | |
import tempfile | |
if "file_uploader_key" not in st.session_state: | |
st.session_state["file_uploader_key"] = 0 | |
if "uploaded_files" not in st.session_state: | |
st.session_state["uploaded_files"] = [] | |
# Prompt Template | |
template = """You are a chatbot having a conversation with a human. | |
Given the following extracted parts of a long document and a question, create a final answer. | |
{context} | |
{chat_history} | |
Human: {human_input} | |
Chatbot:""" | |
# Init Prompt | |
prompt = PromptTemplate( | |
input_variables=["chat_history", "human_input", "context"], template=template | |
) | |
a = st.container() | |
with a: | |
st.title("CHATBOT") | |
global openai_api_key | |
openai_api_key = st.text_input('OpenAI API Key', type='password') | |
if openai_api_key: | |
def llm(): | |
model = OpenAI(temperature=0.0, openai_api_key=openai_api_key) | |
embedding=OpenAIEmbeddings(openai_api_key=openai_api_key) | |
return model, embedding | |
llm,embedding = llm() | |
def chain(): | |
global memory | |
memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input", return_messages=True, k=3) | |
chain = LLMChain( | |
llm=llm, prompt=prompt, memory=memory | |
) | |
return chain | |
global llm_chain | |
llm_chain = chain() | |
summarize_template = """Write a concise summary of the given documents: | |
{text}""" | |
summarize_PROMPT = PromptTemplate(template=summarize_template, input_variables=["text"]) | |
llm_summarize = load_summarize_chain(llm=llm, chain_type="map_reduce", map_prompt=summarize_PROMPT) | |
# chain({"input_documents": docs}, return_only_outputs=True) | |
# llm_summarize = load_summarize_chain(llm, chain_type="map_reduce") | |
######################################## | |
####### CHATBOT interface############# | |
######################################## | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
with a: | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
global documents | |
documents = [] | |
with st.sidebar: | |
uploaded_files = st.file_uploader("Upload file", accept_multiple_files=True, | |
key=st.session_state["file_uploader_key"], | |
type=['txt', 'pdf'] | |
# on_change = check | |
) | |
if uploaded_files: | |
# files = set([file.name for file in uploaded_files]) | |
st.session_state["uploaded_files"] = uploaded_files | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000 , chunk_overlap=10, separators=[" ", ",", "\n"]) | |
for file in uploaded_files: | |
if file.name.endswith(".pdf"): | |
# Save the uploaded file to a temporary location | |
temp_file_path = os.path.join('docs', file.name) | |
with open(temp_file_path, "wb") as temp_file: | |
temp_file.write(file.read()) | |
loader = PyPDFLoader(temp_file_path) | |
# loader = loader.load() | |
elif file.name.endswith('.txt'): | |
# To read file as bytes: | |
bytes_data = file.getvalue() | |
# To convert to a string based IO: | |
stringio = StringIO(file.getvalue().decode("utf-8")) | |
# To read file as string: | |
loader = stringio.read() | |
filename = os.path.join("docs",'text.txt') | |
# filename = 'docs/text.txt' | |
with open(filename,"wb") as f: | |
f.write(file.getbuffer()) | |
loader = TextLoader(filename, autodetect_encoding=True) | |
loader = loader.load() | |
documents.extend(loader) | |
documents = text_splitter.split_documents(documents) | |
# Embedding | |
global docsearch | |
docsearch = Chroma.from_documents(documents, | |
embedding=embedding) | |
######################################## | |
########## SIDEBAR ############### | |
######################################## | |
# create a function that sets the value in state back to an empty list | |
def clear_msg(): | |
st.session_state.messages = [] | |
llm_chain = chain() | |
st.session_state["file_uploader_key"] += 1 | |
st.experimental_rerun() | |
if uploaded_files: | |
if st.sidebar.button('Summarize'): | |
with a: | |
query = 'Summarize uploaded documents' | |
st.chat_message("user").markdown(query) | |
llm_chain.memory.chat_memory.add_user_message(query) | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": query}) | |
response = llm_summarize.run(documents) | |
# chain({"input_documents": docs}, return_only_outputs=True) | |
with st.chat_message("assistant"): | |
st.markdown(response) | |
llm_chain.memory.chat_memory.add_ai_message(response) | |
# Add assistant response to chat history | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
st.sidebar.button("Clear", on_click=clear_msg) | |
######################################## | |
####### React to user input############# | |
######################################## | |
with a: | |
if query := st.chat_input(): | |
# Display user message in chat message container | |
st.chat_message("user").markdown(query) | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": query}) | |
if documents: | |
docs = docsearch.similarity_search(query) | |
else: | |
docs = 'No Context provide' | |
response = llm_chain.run({"context": docs, "human_input": query}) | |
# Display assistant response in chat message container | |
with st.chat_message("assistant"): | |
st.markdown(response) | |
# Add assistant response to chat history | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |