RagModels / chatbot.py
hunterXdk's picture
Initial Commit With ❤
50e4be7 verified
raw
history blame
2.88 kB
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
# chuck_size = 1000, chunk_overlap = 200 (for shorted PDFs)
def get_text_chunks(text):
text_splitter= RecursiveCharacterTextSplitter(
chunk_size=10000,
chunk_overlap=1000,
# length_function=len
)
chunks=text_splitter.split_text(text)
return chunks
# Converting into Vector data/store (can also be stored)
def get_vector_store(text_chunks):
# embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
vector_store.save_local("faiss_index")
# return vector_store
def get_conversation_chain():
prompt_template="""Answer the query as detailed as possible from the provided context, make sure to provide all the details, if answeris not in
the provided context, just say, "Answer is not available in the provided documents", don't provide the wrong answer:\n {context}? \n Query: {query}? \n
Answer:
"""
model=ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
prompt=PromptTemplate(template=prompt_template, input_variables=["context", "query"])
# chain=load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
chain=load_qa_chain(model, chain_type="stuff", prompt=prompt)
return chain
def user_input(user_question):
# embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
# Loading the embeddings
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(user_question)
chain=get_conversation_chain()
response = chain(
{"input_documents": docs, "question": user_question}
, return_only_outputs=True)
print(response)
st.write("Reply: ", response["output_text"])
# Frontend page Processor
def main():
st.set_page_config(page_title="PDF Chatbot")
st.header("PDF Chatbot made with ❤")
user_question = st.text_input("Ask a question about your documents:")
if user_question:
user_input(user_question)
with st.sidebar:
st.title("Menu:")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Submit & Process"):
with st.spinner("Ruko Padh raha hu..."):
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
get_vector_store(text_chunks)
st.success("Saare documents padh liya. Ab swaal pucho 😤")
if __name__ == '__main__':
main()