from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
import streamlit as st

st.set_page_config(page_title = "Solid Tumor Chat", page_icon=":hospital:")

DB_FAISS_PATH = '.'

def get_vectorstore():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    vector_store = FAISS.load_local(DB_FAISS_PATH, embeddings)
    return vector_store

vector_store = get_vectorstore()

llm = HuggingFaceHub(repo_id = "meta-llama/Llama-2-7b-chat-hf",model_kwargs={"temperature":0.5}) #, "max_length":512})

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=vector_store.as_retriever(search_kwargs={'k': 10}),
                                       #retriever=vector_store.as_retriever(search_kwargs={"score_threshold": .01}),
                                       return_source_documents = True
                                       )


with st.container():
    st.title("Solid Tumor Rules Chat")

with st.sidebar:     
    st.subheader("Find references in solid tumor staging manual.")
    st.markdown("""
    We look into the solid tumor staging manual to find top ten most relevant excerpts:
    - [SEER Solid Tumor Rules](https://app.leg.wa.gov/rcw/default.aspx?cite=70.41https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf)
    """) #, unsafe_allow_html=True)
    st.write("This is tool is meant to assist healthcare workers to the extent it can. Please note that the page numbers may be occasionally slightly off, use the included excerpt to find the reference if this happens.")

st.markdown("**Ask your question and :red[click 'Find Matches'.]**")

prompt = st.text_input("e.g. How do you code anaplastic meningioma?")

if (st.button("Find Matches")):
    answer = qa_chain({"query":prompt})

    n = len(answer['source_documents'])

    for i in range(n):
        with st.container():
            page = answer['source_documents'][i].metadata['page'] +1 # I don't understand why 1 should be added but it works
            page_link = "[**Page: " + str(page) + "**](https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf#page=" + str(page) + ")"
            st.markdown(page_link)
            st.write("...")
            st.write(answer['source_documents'][i].page_content)
            st.write("...")
            st.write('---------------------------------\n\n')