File size: 2,615 Bytes
abcfaf4
 
 
 
 
 
cd8a181
abcfaf4
 
 
 
 
 
 
 
 
 
 
f62ef6b
abcfaf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd8a181
 
 
abcfaf4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
import streamlit as st

st.set_page_config(page_title = "Solid Tumor Chat", page_icon=":hospital:")

DB_FAISS_PATH = '.'

def get_vectorstore():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    vector_store = FAISS.load_local(DB_FAISS_PATH, embeddings)
    return vector_store

vector_store = get_vectorstore()

llm = HuggingFaceHub(repo_id = "meta-llama/Llama-2-7b-chat-hf",model_kwargs={"temperature":0.5}) #, "max_length":512})

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=vector_store.as_retriever(search_kwargs={'k': 10}),
                                       #retriever=vector_store.as_retriever(search_kwargs={"score_threshold": .01}),
                                       return_source_documents = True
                                       )


with st.container():
    st.title("Solid Tumor Rules Chat")

with st.sidebar:     
    st.subheader("Find references in solid tumor staging manual.")
    st.markdown("""
    We look into the solid tumor staging manual to find top ten most relevant excerpts:
    - [SEER Solid Tumor Rules](https://app.leg.wa.gov/rcw/default.aspx?cite=70.41https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf)
    """) #, unsafe_allow_html=True)
    st.write("This is tool is meant to assist healthcare workers to the extent it can. Please note that the page numbers may be occasionally slightly off, use the included excerpt to find the reference if this happens.")

st.markdown("**Ask your question and :red[click 'Find Matches'.]**")

prompt = st.text_input("e.g. How do you code anaplastic meningioma?")

if (st.button("Find Matches")):
    answer = qa_chain({"query":prompt})

    n = len(answer['source_documents'])

    for i in range(n):
        with st.container():
            page = answer['source_documents'][i].metadata['page'] +1 # I don't understand why 1 should be added but it works
            page_link = "[**Page: " + str(page) + "**](https://seer.cancer.gov/tools/solidtumor/2023/STM_Combined.pdf#page=" + str(page) + ")"
            st.markdown(page_link)
            st.write("...")
            st.write(answer['source_documents'][i].page_content)
            st.write("...")
            st.write('---------------------------------\n\n')