# import streamlit as st # from langchain_core.messages import HumanMessage, AIMessage, SystemMessage # from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate # import os # import nltk # import io # import fitz # nltk.download("punkt") # st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper') # st.header("AI Chatbot :robot_face:") # os.environ["GOOGLE_API_KEY"] = os.getenv("k4") # # Creating a template # chat_template = ChatPromptTemplate.from_messages([ # # System Message establishes bot's role and general behavior guidelines # SystemMessage(content="""You are a Helpful AI Bot. # You take the context and question from user. Your answer should be based on the specific context."""), # # Human Message Prompt Template # HumanMessagePromptTemplate.from_template("""Answer the question based on the given context. # Context: # {context} # Question: # {question} # Answer: """) # ]) # #user's question. # #how many results we want to print. # from langchain_google_genai import ChatGoogleGenerativeAI # chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest") # from langchain_core.output_parsers import StrOutputParser # output_parser = StrOutputParser() # chain = chat_template | chat_model | output_parser # from langchain_community.document_loaders import PDFMinerLoader # from langchain_text_splitters import NLTKTextSplitter # from langchain_google_genai import GoogleGenerativeAIEmbeddings # from langchain_community.vectorstores import Chroma # from langchain_core.runnables import RunnablePassthrough # def extract_text_from_pdf(pdf_file): # document = fitz.open(stream=pdf_file, filetype="pdf") # text = "" # for page_num in range(len(document)): # page = document.load_page(page_num) # text += page.get_text() # return text # uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf") # if uploaded_file is not None: # pdf_file = io.BytesIO(uploaded_file.read()) # text = extract_text_from_pdf(pdf_file) # #pdf_loader = PDFMinerLoader(pdf_file) # #dat_nik = pdf_loader.load() # text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100) # chunks = text_splitter.split_documents([text]) # embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1") # db.persist() # db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model) # retriever = db_connection.as_retriever(search_kwargs={"k": 5}) # def format_docs(docs): # return "\n\n".join(doc.page_content for doc in docs) # rag_chain = ( # {"context": retriever | format_docs, "question": RunnablePassthrough()} # | chat_template # | chat_model # | output_parser # ) # user_input = st.text_area("Ask Questions to AI") # if st.button("Submit"): # st.subheader(":green[Query:]") # st.subheader(user_input) # response = rag_chain.invoke(user_input) # st.subheader(":green[Response:-]") # st.write(response) ##################################################### chatgpt code model ############################################# import streamlit as st from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate import os import nltk import io import fitz nltk.download("punkt") st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper') st.header("AI Chatbot :robot_face:") # Set up environment variables os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") # Creating a template chat_template = ChatPromptTemplate.from_messages([ SystemMessage(content="""You are a Helpful AI Bot. You take the context and question from user. Your answer should be based on the specific context."""), HumanMessagePromptTemplate.from_template("""Answer the question based on the given context. Context: {context} Question: {question} Answer: """) ]) # Initialize chat model from langchain_google_genai import ChatGoogleGenerativeAI chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest") # Initialize output parser from langchain_core.output_parsers import StrOutputParser output_parser = StrOutputParser() # Initialize the chain chain = chat_template | chat_model | output_parser # Initialize document loaders and splitters from langchain_community.document_loaders import PDFMinerLoader from langchain_text_splitters import NLTKTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_community.vectorstores import Chroma from langchain_core.runnables import RunnablePassthrough def extract_text_from_pdf(pdf_file): document = fitz.open(stream=pdf_file, filetype="pdf") text = "" for page_num in range(len(document)): page = document.load_page(page_num) text += page.get_text() return text # Streamlit file uploader uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: # Extract text from the uploaded PDF pdf_file = io.BytesIO(uploaded_file.read()) text = extract_text_from_pdf(pdf_file) # Split the document into chunks text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100) chunks = text_splitter.split_documents([text]) # Initialize embeddings and vectorstore embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db") print(f"Current working directory: {os.getcwd()}") # Check if the 'static' directory exists if not os.path.exists('static'): print("'static' directory does not exist. Creating it...") os.makedirs('static') db.persist() db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model) retriever = db_connection.as_retriever(search_kwargs={"k": 5}) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | chat_template | chat_model | output_parser ) user_input = st.text_area("Ask Questions to AI") if st.button("Submit"): st.subheader(":green[Query:]") st.subheader(user_input) response = rag_chain.invoke({"question": user_input}) st.subheader(":green[Response:]") st.write(response) else: st.write("Please upload a PDF file to get started.")