prudhvirajdowluri commited on
Commit
d82794d
β€’
1 Parent(s): 752126c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +101 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
+ from dotenv import load_dotenv
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from llama_index.core import Settings
7
+ import os
8
+ import base64
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Configure the Llama index settings
14
+ Settings.llm = HuggingFaceInferenceAPI(
15
+ model_name="meta-llama/Meta-Llama-3-8B-Instruct",
16
+ tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
17
+ context_window=3900,
18
+ token=os.getenv("HF_TOKEN"),
19
+ max_new_tokens=1024,
20
+ generate_kwargs={"temperature": 0.1},
21
+ )
22
+ Settings.embed_model = HuggingFaceEmbedding(
23
+ model_name="BAAI/bge-large-en-v1.5"
24
+ )
25
+
26
+ # Define the directory for persistent storage and data
27
+ PERSIST_DIR = "./db"
28
+ DATA_DIR = "data"
29
+
30
+ # Ensure data directory exists
31
+ os.makedirs(DATA_DIR, exist_ok=True)
32
+ os.makedirs(PERSIST_DIR, exist_ok=True)
33
+
34
+ def displayPDF(file):
35
+ with open(file, "rb") as f:
36
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
37
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
38
+ st.markdown(pdf_display, unsafe_allow_html=True)
39
+
40
+ def data_ingestion():
41
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
42
+ storage_context = StorageContext.from_defaults()
43
+ index = VectorStoreIndex.from_documents(documents)
44
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
45
+
46
+ def handle_query(query):
47
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
48
+ index = load_index_from_storage(storage_context)
49
+ chat_text_qa_msgs = [
50
+ (
51
+ "user",
52
+ """You are a Q&A assistant. Created by Prudhvi Raj Dowluri [linkdin](https://www.linkedin.com/in/prudhvi-raj-dowluri-412616221/) an AI Engineer. Your primary objective is to provide accurate and helpful answers based on the instructions and context provided.If a question falls outside the given context or scope, kindly guide the user to ask questions that align with the provided context.
53
+ Context:
54
+ {context_str}
55
+ Question:
56
+ {query_str}
57
+ """
58
+ )
59
+ ]
60
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
61
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
62
+ answer = query_engine.query(query)
63
+
64
+ if hasattr(answer, 'response'):
65
+ return answer.response
66
+ elif isinstance(answer, dict) and 'response' in answer:
67
+ return answer['response']
68
+ else:
69
+ return "Sorry, I couldn't find an answer."
70
+
71
+ # Streamlit app initialization
72
+ st.title("Your PDF Assistant πŸ“„")
73
+ st.markdown("Get insights from your data – just chat!πŸ‘‡")
74
+
75
+ if 'messages' not in st.session_state:
76
+ st.session_state.messages = [{'role': 'assistant', "content": 'I can answer your questions about a PDF. Just upload it!'}]
77
+
78
+ with st.sidebar:
79
+ st.markdown("**Created by [Prudhvi](https://www.linkedin.com/in/prudhvi-raj-dowluri-412616221/)**")
80
+ st.title(':blue[Get Started]:')
81
+ uploaded_file = st.file_uploader("Upload your PDF and Click Submit")
82
+ if st.button("Submit"):
83
+ with st.spinner("Processing..."):
84
+ filepath = "data/saved_pdf.pdf"
85
+ with open(filepath, "wb") as f:
86
+ f.write(uploaded_file.getbuffer())
87
+ # displayPDF(filepath) # Display the uploaded PDF
88
+ data_ingestion() # Process PDF every time new file is uploaded
89
+ st.success("Done")
90
+
91
+ user_prompt = st.chat_input("Ask me anything about the data inside the document:")
92
+ if user_prompt:
93
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
94
+ response = handle_query(user_prompt)
95
+ st.session_state.messages.append({'role': 'assistant', "content": response})
96
+
97
+ for message in st.session_state.messages:
98
+ with st.chat_message(message['role']):
99
+ st.write(message['content'])
100
+
101
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ python-dotenv
3
+ llama-index
4
+ llama-index-embeddings-huggingface
5
+ llama-index-llms-huggingface