JBHF commited on
Commit
1e7caaa
·
verified ·
1 Parent(s): 022c268

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JB:
2
+ # LangChainDeprecationWarning: Importing embeddings from langchain is deprecated.
3
+ # Importing from langchain will no longer be supported as of langchain==0.2.0.
4
+ # Please import from langchain-community instead:
5
+ # `from langchain_community.embeddings import FastEmbedEmbeddings`.
6
+ # To install langchain-community run `pip install -U langchain-community`.
7
+ from langchain_community.embeddings import FastEmbedEmbeddings
8
+
9
+ import os
10
+ import streamlit as st
11
+ from langchain_groq import ChatGroq
12
+ from langchain_community.document_loaders import WebBaseLoader
13
+ from langchain_community.embeddings import OllamaEmbeddings
14
+
15
+ # JB:
16
+ from langchain.embeddings import FastEmbedEmbeddings
17
+
18
+ # from langchain_community.vectorstores import FAISS
19
+ # from langchain.vectorstores import Chroma
20
+ from langchain_community.vectorstores import Chroma
21
+
22
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
23
+ from langchain.chains.combine_documents import create_stuff_documents_chain
24
+ from langchain_core.prompts import ChatPromptTemplate
25
+ from langchain.chains import create_retrieval_chain
26
+ import time
27
+ from dotenv import load_dotenv
28
+
29
+ load_dotenv() #
30
+
31
+ # groq_api_key = os.environ['GROQ_API_KEY']
32
+ groq_api_key = "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" # os.environ['GROQ_API_KEY']
33
+ print("groq_api_key: ", groq_api_key)
34
+
35
+
36
+ if "vector" not in st.session_state:
37
+
38
+ # st.session_state.embeddings = OllamaEmbeddings() # ORIGINAL
39
+ st.session_state.embeddings = FastEmbedEmbeddings() # JB
40
+
41
+
42
+ st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
43
+ st.session_state.docs = st.session_state.loader.load()
44
+
45
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
46
+ st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
47
+ # st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
48
+ # ZIE:
49
+ # ZIE VOOR EEN APP MET CHROMADB:
50
+ # https://github.com/vndee/local-rag-example/blob/main/rag.py
51
+ # https://raw.githubusercontent.com/vndee/local-rag-example/main/rag.py
52
+ # Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())
53
+ st.session_state.vector = Chroma.from_documents(st.session_state.documents, st.session_state.embeddings) # JB
54
+
55
+
56
+ # st.title("Chat with Docs - Groq Edition :) ")
57
+ st.title("Literature Based Research (LBR) - Alexander Unzicker and Jan Bours - Chat with Docs - Groq Edition (Very Fast!) ")
58
+
59
+
60
+ llm = ChatGroq(
61
+ groq_api_key=groq_api_key,
62
+ model_name='mixtral-8x7b-32768'
63
+ )
64
+
65
+ prompt = ChatPromptTemplate.from_template("""
66
+ Answer the following question based only on the provided context.
67
+ Think step by step before providing a detailed answer.
68
+ I will tip you $200 if the user finds the answer helpful.
69
+ <context>
70
+ {context}
71
+ </context>
72
+ Question: {input}""")
73
+
74
+ document_chain = create_stuff_documents_chain(llm, prompt)
75
+
76
+ retriever = st.session_state.vector.as_retriever()
77
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
78
+
79
+ prompt = st.text_input("Input your prompt here")
80
+
81
+
82
+ # If the user hits enter
83
+ if prompt:
84
+ # Then pass the prompt to the LLM
85
+ start = time.process_time()
86
+ response = retrieval_chain.invoke({"input": prompt})
87
+ print(f"Response time: {time.process_time() - start}")
88
+
89
+ st.write(response["answer"])
90
+
91
+ # With a streamlit expander
92
+ with st.expander("Document Similarity Search"):
93
+ # Find the relevant chunks
94
+ for i, doc in enumerate(response["context"]):
95
+ # print(doc)
96
+ # st.write(f"Source Document # {i+1} : {doc.metadata['source'].split('/')[-1]}")
97
+ st.write(doc.page_content)
98
+ st.write("--------------------------------")