Spaces:
Sleeping
Sleeping
File size: 2,656 Bytes
c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 c647394 b77cac2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from PyPDF2 import PdfReader
import os
# Function to process the uploaded PDF and convert it to documents
def pdf_to_documents(pdf_file):
reader = PdfReader(pdf_file.name)
pages = [page.extract_text() for page in reader.pages]
documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
return documents
# Initialize vector store
def initialize_vectorstore(documents, api_key):
os.environ["OPENAI_API_KEY"] = api_key
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embedding=embeddings)
return vectorstore
# RAG retrieval and LLM chain
def rag_from_pdf(question, pdf_file, api_key):
documents = pdf_to_documents(pdf_file)
vectorstore = initialize_vectorstore(documents, api_key)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections
# Initialize the LLM
llm = ChatOpenAI(model="gpt-3.5-turbo")
# Create a prompt template for combining context and question
prompt_template = """
Answer this question using the provided context only.
{question}
Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
# Create a RAG chain combining retriever and LLM
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
# Perform retrieval and return LLM's answer
response = rag_chain.invoke(question)
return response.content
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("## PDF-based Question Answering with RAG")
# Input for OpenAI API Key
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
# File upload for the PDF
pdf_file_input = gr.File(label="Upload your PDF document")
# Question input
question_input = gr.Textbox(label="Ask a question related to the PDF")
# Output for the RAG response
rag_output = gr.Textbox(label="Generated Response", lines=10)
# Button to run RAG chain
rag_button = gr.Button("Ask Question")
# Functionality for the RAG chain
rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
# Launch Gradio app
app.launch() |