Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain_community.vectorstores import Chroma
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
9 |
+
from langchain.chains import create_retrieval_chain
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
11 |
+
|
12 |
+
# Set your API key
|
13 |
+
GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
|
14 |
+
|
15 |
+
def process_pdf_and_question(pdf_file, question):
|
16 |
+
# Load the models with the API key
|
17 |
+
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
|
18 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
|
19 |
+
|
20 |
+
# Save the uploaded PDF temporarily
|
21 |
+
temp_pdf_path = "temp_handbook.pdf"
|
22 |
+
with open(temp_pdf_path, "wb") as f:
|
23 |
+
f.write(pdf_file)
|
24 |
+
|
25 |
+
# Load the PDF and create chunks
|
26 |
+
loader = PyPDFLoader(temp_pdf_path)
|
27 |
+
text_splitter = CharacterTextSplitter(
|
28 |
+
separator=".",
|
29 |
+
chunk_size=500,
|
30 |
+
chunk_overlap=50,
|
31 |
+
length_function=len,
|
32 |
+
is_separator_regex=False,
|
33 |
+
)
|
34 |
+
pages = loader.load_and_split(text_splitter)
|
35 |
+
|
36 |
+
# Turn the chunks into embeddings and store them in Chroma
|
37 |
+
vectordb = Chroma.from_documents(pages, embeddings)
|
38 |
+
|
39 |
+
# Configure Chroma as a retriever with top_k=10
|
40 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 10})
|
41 |
+
|
42 |
+
# Create the retrieval chain
|
43 |
+
template = """You are a helpful AI assistant. Answer based on the context provided.
|
44 |
+
context: {context}
|
45 |
+
input: {input}
|
46 |
+
answer:"""
|
47 |
+
prompt = PromptTemplate.from_template(template)
|
48 |
+
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
|
49 |
+
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
|
50 |
+
|
51 |
+
# Invoke the retrieval chain
|
52 |
+
response = retrieval_chain.invoke({"input": question})
|
53 |
+
|
54 |
+
# Clean up the temporary PDF file
|
55 |
+
os.remove(temp_pdf_path)
|
56 |
+
|
57 |
+
return response["answer"]
|
58 |
+
|
59 |
+
# Define Gradio interface
|
60 |
+
iface = gr.Interface(
|
61 |
+
fn=process_pdf_and_question,
|
62 |
+
inputs=[
|
63 |
+
gr.File(label="上傳PDF手冊"),
|
64 |
+
gr.Textbox(label="輸入您的問題")
|
65 |
+
],
|
66 |
+
outputs=gr.Textbox(label="回答"),
|
67 |
+
title="PDF問答系統",
|
68 |
+
description="上傳PDF手冊並提出問題,AI將根據手冊內容回答您的問題。"
|
69 |
+
)
|
70 |
+
|
71 |
+
# Launch the interface
|
72 |
+
iface.launch()
|