Spaces:

sidcww
/

20240823B

Sleeping

App Files Files Community

sidcww commited on Aug 23, 2024

Commit

2c46a3b

verified ·

1 Parent(s): 8468e96

Create app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+import os
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import Chroma
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import create_retrieval_chain
+from langchain_community.document_loaders import PyPDFLoader
+# Set your API key
+GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
+def process_pdf_and_question(pdf_file, question):
+    # Load the models with the API key
+    llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+    # Save the uploaded PDF temporarily
+    temp_pdf_path = "temp_handbook.pdf"
+    with open(temp_pdf_path, "wb") as f:
+        f.write(pdf_file)
+    # Load the PDF and create chunks
+    loader = PyPDFLoader(temp_pdf_path)
+    text_splitter = CharacterTextSplitter(
+        separator=".",
+        chunk_size=500,
+        chunk_overlap=50,
+        length_function=len,
+        is_separator_regex=False,
+    )
+    pages = loader.load_and_split(text_splitter)
+    # Turn the chunks into embeddings and store them in Chroma
+    vectordb = Chroma.from_documents(pages, embeddings)
+    # Configure Chroma as a retriever with top_k=10
+    retriever = vectordb.as_retriever(search_kwargs={"k": 10})
+    # Create the retrieval chain
+    template = """You are a helpful AI assistant. Answer based on the context provided.
+    context: {context}
+    input: {input}
+    answer:"""
+    prompt = PromptTemplate.from_template(template)
+    combine_docs_chain = create_stuff_documents_chain(llm, prompt)
+    retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
+    # Invoke the retrieval chain
+    response = retrieval_chain.invoke({"input": question})
+    # Clean up the temporary PDF file
+    os.remove(temp_pdf_path)
+    return response["answer"]
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_pdf_and_question,
+    inputs=[
+        gr.File(label="上傳PDF手冊"),
+        gr.Textbox(label="輸入您的問題")
+    ],
+    outputs=gr.Textbox(label="回答"),
+    title="PDF問答系統",
+    description="上傳PDF手冊並提出問題，AI將根據手冊內容回答您的問題。"
+)
+# Launch the interface
+iface.launch()