sidcww commited on
Commit
2c46a3b
1 Parent(s): 8468e96

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.chains.combine_documents import create_stuff_documents_chain
9
+ from langchain.chains import create_retrieval_chain
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+
12
+ # Set your API key
13
+ GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
14
+
15
+ def process_pdf_and_question(pdf_file, question):
16
+ # Load the models with the API key
17
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
18
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
19
+
20
+ # Save the uploaded PDF temporarily
21
+ temp_pdf_path = "temp_handbook.pdf"
22
+ with open(temp_pdf_path, "wb") as f:
23
+ f.write(pdf_file)
24
+
25
+ # Load the PDF and create chunks
26
+ loader = PyPDFLoader(temp_pdf_path)
27
+ text_splitter = CharacterTextSplitter(
28
+ separator=".",
29
+ chunk_size=500,
30
+ chunk_overlap=50,
31
+ length_function=len,
32
+ is_separator_regex=False,
33
+ )
34
+ pages = loader.load_and_split(text_splitter)
35
+
36
+ # Turn the chunks into embeddings and store them in Chroma
37
+ vectordb = Chroma.from_documents(pages, embeddings)
38
+
39
+ # Configure Chroma as a retriever with top_k=10
40
+ retriever = vectordb.as_retriever(search_kwargs={"k": 10})
41
+
42
+ # Create the retrieval chain
43
+ template = """You are a helpful AI assistant. Answer based on the context provided.
44
+ context: {context}
45
+ input: {input}
46
+ answer:"""
47
+ prompt = PromptTemplate.from_template(template)
48
+ combine_docs_chain = create_stuff_documents_chain(llm, prompt)
49
+ retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
50
+
51
+ # Invoke the retrieval chain
52
+ response = retrieval_chain.invoke({"input": question})
53
+
54
+ # Clean up the temporary PDF file
55
+ os.remove(temp_pdf_path)
56
+
57
+ return response["answer"]
58
+
59
+ # Define Gradio interface
60
+ iface = gr.Interface(
61
+ fn=process_pdf_and_question,
62
+ inputs=[
63
+ gr.File(label="上傳PDF手冊"),
64
+ gr.Textbox(label="輸入您的問題")
65
+ ],
66
+ outputs=gr.Textbox(label="回答"),
67
+ title="PDF問答系統",
68
+ description="上傳PDF手冊並提出問題,AI將根據手冊內容回答您的問題。"
69
+ )
70
+
71
+ # Launch the interface
72
+ iface.launch()