from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain import HuggingFaceHub from langchain.chains import RetrievalQA import streamlit as st st.set_page_config(page_title = "Solid Tumor Chat", page_icon=":hospital:") DB_FAISS_PATH = '.' def get_vectorstore(): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) vector_store = FAISS.load_local(DB_FAISS_PATH, embeddings) return vector_store vector_store = get_vectorstore() llm = HuggingFaceHub(repo_id = "meta-llama/Llama-2-7b-chat-hf",model_kwargs={"temperature":0.5}) #, "max_length":512}) qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=vector_store.as_retriever(search_kwargs={'k': 10}), #retriever=vector_store.as_retriever(search_kwargs={"score_threshold": .01}), return_source_documents = True ) with st.container(): st.title("Solid Tumor Rules Chat") with st.sidebar: st.subheader("Find references in solid tumor staging manual.") st.markdown(""" We look into the solid tumor staging manual to find top ten most relevant excerpts: - [SEER Solid Tumor Rules](https://app.leg.wa.gov/rcw/default.aspx?cite=70.41https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf) """) #, unsafe_allow_html=True) st.write("This is tool is meant to assist healthcare workers to the extent it can. Please note that the page numbers may be occasionally slightly off, use the included excerpt to find the reference if this happens.") st.markdown("**Ask your question and :red[click 'Find Matches'.]**") prompt = st.text_input("e.g. How do you code anaplastic meningioma?") if (st.button("Find Matches")): answer = qa_chain({"query":prompt}) n = len(answer['source_documents']) for i in range(n): with st.container(): page = answer['source_documents'][i].metadata['page'] +1 # I don't understand why 1 should be added but it works page_link = "[**Page: " + str(page) + "**](https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf#page=" + str(page) + ")" st.markdown(page_link) st.write("...") st.write(answer['source_documents'][i].page_content) st.write("...") st.write('---------------------------------\n\n')