Update app.py
Browse files
app.py
CHANGED
@@ -1,93 +1,90 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
-
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
5 |
-
import streamlit as st
|
6 |
-
import google.generativeai as genai
|
7 |
-
from langchain.vectorstores import FAISS
|
8 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
-
from langchain.chains.question_answering import load_qa_chain
|
10 |
-
from langchain.prompts import PromptTemplate
|
11 |
-
from dotenv import load_dotenv
|
12 |
-
|
13 |
-
load_dotenv()
|
14 |
-
os.getenv("GOOGLE_API_KEY")
|
15 |
-
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
16 |
-
|
17 |
-
# read all pdf files and return text
|
18 |
-
|
19 |
-
|
20 |
-
def get_pdf_text(pdf_docs):
|
21 |
-
text = ""
|
22 |
-
for pdf in pdf_docs:
|
23 |
-
pdf_reader = PdfReader(pdf)
|
24 |
-
for page in pdf_reader.pages:
|
25 |
-
text += page.extract_text()
|
26 |
-
return text
|
27 |
-
|
28 |
-
# split text into chunks
|
29 |
-
|
30 |
-
|
31 |
-
def get_text_chunks(text):
|
32 |
-
splitter = RecursiveCharacterTextSplitter(
|
33 |
-
chunk_size=10000, chunk_overlap=1000)
|
34 |
-
chunks = splitter.split_text(text)
|
35 |
-
return chunks # list of strings
|
36 |
-
|
37 |
-
# get embeddings for each chunk
|
38 |
-
|
39 |
|
40 |
-
|
41 |
-
embeddings = GoogleGenerativeAIEmbeddings(
|
42 |
-
model="models/embedding-001") # type: ignore
|
43 |
-
vector_store = FAISS.from_texts(chunks, embedding=embeddings)
|
44 |
-
vector_store.save_local("faiss_index")
|
45 |
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
You are an expert economist,answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
50 |
-
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
51 |
-
Context:\n {context}?\n
|
52 |
-
Question: \n{question}\n
|
53 |
-
|
54 |
-
Answer:
|
55 |
-
"""
|
56 |
-
|
57 |
-
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
58 |
-
client=genai,
|
59 |
-
temperature=0.3,
|
60 |
-
)
|
61 |
-
prompt = PromptTemplate(template=prompt_template,
|
62 |
-
input_variables=["context", "question"])
|
63 |
-
chain = load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
|
64 |
-
return chain
|
65 |
-
|
66 |
-
|
67 |
-
def clear_chat_history():
|
68 |
-
st.session_state.messages = [
|
69 |
-
{"role": "assistant", "content": "upload some pdfs and ask me a question"}]
|
70 |
-
|
71 |
-
|
72 |
-
def user_input(user_question):
|
73 |
-
embeddings = GoogleGenerativeAIEmbeddings(
|
74 |
-
model="models/embedding-001") # type: ignore
|
75 |
-
|
76 |
-
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
77 |
-
docs = new_db.similarity_search(user_question)
|
78 |
-
|
79 |
-
chain = get_conversational_chain()
|
80 |
|
81 |
-
|
82 |
-
{"input_documents": docs, "question": user_question}, return_only_outputs=True, )
|
83 |
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
def main():
|
89 |
st.set_page_config(
|
90 |
-
page_title="
|
91 |
page_icon="🤖"
|
92 |
)
|
93 |
|
|
|
1 |
+
"""
|
2 |
+
Install the Google AI Python SDK
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
$ pip install google-generativeai
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
See the getting started guide for more information:
|
7 |
+
https://ai.google.dev/gemini-api/docs/get-started/python
|
8 |
+
"""
|
9 |
|
10 |
+
import os
|
11 |
+
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
import google.generativeai as genai
|
|
|
14 |
|
15 |
+
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
16 |
+
|
17 |
+
def upload_to_gemini(path, mime_type=None):
|
18 |
+
"""Uploads the given file to Gemini.
|
19 |
+
|
20 |
+
See https://ai.google.dev/gemini-api/docs/prompting_with_media
|
21 |
+
"""
|
22 |
+
file = genai.upload_file(path, mime_type=mime_type)
|
23 |
+
print(f"Uploaded file '{file.display_name}' as: {file.uri}")
|
24 |
+
return file
|
25 |
+
|
26 |
+
def wait_for_files_active(files):
|
27 |
+
"""Waits for the given files to be active.
|
28 |
+
|
29 |
+
Some files uploaded to the Gemini API need to be processed before they can be
|
30 |
+
used as prompt inputs. The status can be seen by querying the file's "state"
|
31 |
+
field.
|
32 |
+
|
33 |
+
This implementation uses a simple blocking polling loop. Production code
|
34 |
+
should probably employ a more sophisticated approach.
|
35 |
+
"""
|
36 |
+
print("Waiting for file processing...")
|
37 |
+
for name in (file.name for file in files):
|
38 |
+
file = genai.get_file(name)
|
39 |
+
while file.state.name == "PROCESSING":
|
40 |
+
print(".", end="", flush=True)
|
41 |
+
time.sleep(10)
|
42 |
+
file = genai.get_file(name)
|
43 |
+
if file.state.name != "ACTIVE":
|
44 |
+
raise Exception(f"File {file.name} failed to process")
|
45 |
+
print("...all files ready")
|
46 |
+
print()
|
47 |
+
|
48 |
+
# Create the model
|
49 |
+
# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
|
50 |
+
generation_config = {
|
51 |
+
"temperature": 1,
|
52 |
+
"top_p": 0.95,
|
53 |
+
"top_k": 64,
|
54 |
+
"max_output_tokens": 8192,
|
55 |
+
"response_mime_type": "text/plain",
|
56 |
+
}
|
57 |
+
|
58 |
+
model = genai.GenerativeModel(
|
59 |
+
model_name="gemini-1.5-pro",
|
60 |
+
generation_config=generation_config,
|
61 |
+
# safety_settings = Adjust safety settings
|
62 |
+
# See https://ai.google.dev/gemini-api/docs/safety-settings
|
63 |
+
system_instruction="You are an expert economist. Provide concise answers and if you do not know, just say you do not know.",
|
64 |
+
)
|
65 |
+
|
66 |
+
# TODO Make these files available on the local file system
|
67 |
+
# You may need to update the file paths
|
68 |
+
files = [
|
69 |
+
upload_to_gemini("2024_25_Annex_Budget.pdf", mime_type="application/pdf"),
|
70 |
+
upload_to_gemini("2024_25_Budget_Speech.pdf", mime_type="application/pdf"),
|
71 |
+
]
|
72 |
+
|
73 |
+
# Some files have a processing delay. Wait for them to be ready.
|
74 |
+
wait_for_files_active(files)
|
75 |
+
|
76 |
+
chat_session = model.start_chat(
|
77 |
+
history=[ ]
|
78 |
+
)
|
79 |
+
|
80 |
+
response = chat_session.send_message("INSERT_INPUT_HERE")
|
81 |
+
|
82 |
+
print(response.text)
|
83 |
|
84 |
|
85 |
def main():
|
86 |
st.set_page_config(
|
87 |
+
page_title="Budget 2024-2025 Chatbot",
|
88 |
page_icon="🤖"
|
89 |
)
|
90 |
|