Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain_chroma import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_core.documents import Document | |
from langchain_openai import ChatOpenAI | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.runnables import RunnablePassthrough | |
from PyPDF2 import PdfReader | |
import os | |
# Function to process the uploaded PDF and convert it to documents | |
def pdf_to_documents(pdf_file): | |
reader = PdfReader(pdf_file.name) | |
pages = [page.extract_text() for page in reader.pages] | |
documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)] | |
return documents | |
# Initialize vector store | |
def initialize_vectorstore(documents, api_key): | |
os.environ["OPENAI_API_KEY"] = api_key | |
embeddings = OpenAIEmbeddings() | |
vectorstore = Chroma.from_documents(documents, embedding=embeddings) | |
return vectorstore | |
# RAG retrieval and LLM chain | |
def rag_from_pdf(question, pdf_file, api_key): | |
documents = pdf_to_documents(pdf_file) | |
vectorstore = initialize_vectorstore(documents, api_key) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections | |
# Initialize the LLM | |
llm = ChatOpenAI(model="gpt-3.5-turbo") | |
# Create a prompt template for combining context and question | |
prompt_template = """ | |
Answer this question using the provided context only. | |
{question} | |
Context: | |
{context} | |
""" | |
prompt = ChatPromptTemplate.from_messages([("human", prompt_template)]) | |
# Create a RAG chain combining retriever and LLM | |
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm | |
# Perform retrieval and return LLM's answer | |
response = rag_chain.invoke(question) | |
return response.content | |
# Gradio interface | |
with gr.Blocks() as app: | |
gr.Markdown("## PDF-based Question Answering with RAG") | |
# Input for OpenAI API Key | |
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password") | |
# File upload for the PDF | |
pdf_file_input = gr.File(label="Upload your PDF document") | |
# Question input | |
question_input = gr.Textbox(label="Ask a question related to the PDF") | |
# Output for the RAG response | |
rag_output = gr.Textbox(label="Generated Response", lines=10) | |
# Button to run RAG chain | |
rag_button = gr.Button("Ask Question") | |
# Functionality for the RAG chain | |
rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output) | |
# Launch Gradio app | |
app.launch() |