Lucas commited on
Commit
d9c152b
·
1 Parent(s): bd2c837

Add main application file

Browse files
Files changed (1) hide show
  1. main.py +148 -0
main.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.llms import OpenAI
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.document_loaders import PyPDFLoader
8
+ import panel as pn
9
+ import PyPDF2
10
+ import tempfile
11
+
12
+ pn.extension(notifications=True)
13
+ pn.extension('texteditor', template="bootstrap", sizing_mode='stretch_width')
14
+ pn.state.template.param.update(
15
+ main_max_width="690px",
16
+ header_background="#F08080",
17
+ )
18
+ file_input = pn.widgets.FileInput(width=300)
19
+
20
+ openaikey = pn.widgets.PasswordInput(
21
+ value="", placeholder="Enter your OpenAI API Key here...", width=300
22
+ )
23
+ prompt = pn.widgets.TextEditor(
24
+ value="", placeholder="Enter your questions here...", height=160, toolbar=False
25
+ )
26
+ run_button = pn.widgets.Button(name="Run!")
27
+
28
+ select_k = pn.widgets.IntSlider(
29
+ name="Number of relevant chunks", start=1, end=5, step=1, value=2
30
+ )
31
+ select_chain_type = pn.widgets.RadioButtonGroup(
32
+ name='Chain type',
33
+ options=['stuff', 'map_reduce', "refine", "map_rerank"]
34
+ )
35
+
36
+ widgets = pn.Row(
37
+ pn.Column(prompt, run_button, margin=5),
38
+ pn.Card(
39
+ "Chain type:",
40
+ pn.Column(select_chain_type, select_k),
41
+ title="Advanced settings", margin=10
42
+ ), width=600
43
+ )
44
+
45
+
46
+ def is_valid_pdf(file_path):
47
+ try:
48
+ with open(file_path, 'rb') as f:
49
+ PyPDF2.PdfReader(f)
50
+ return True
51
+ except:
52
+ return False
53
+
54
+
55
+ def qa(file, query, chain_type, k):
56
+ # load document
57
+ if not is_valid_pdf(file):
58
+ result = {'error': 'Invalid PDF file.'}
59
+ return result
60
+
61
+ loader = PyPDFLoader(file)
62
+ documents = loader.load()
63
+
64
+ # split the documents into chunks
65
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
66
+ texts = text_splitter.split_documents(documents)
67
+
68
+ # select which embeddings we want to use
69
+ embeddings = OpenAIEmbeddings()
70
+
71
+ # create the vectorestore to use as the index
72
+ db = Chroma.from_documents(texts, embeddings)
73
+ # expose this index in a retriever interface
74
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
75
+ # create a chain to answer questions
76
+
77
+ qa = RetrievalQA.from_chain_type(
78
+ llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
79
+ result = qa({"query": query})
80
+ print(result['result'])
81
+ return result
82
+
83
+
84
+ convos = [] # store all panel objects in a list
85
+
86
+
87
+ def qa_result(_):
88
+ os.environ["OPENAI_API_KEY"] = openaikey.value
89
+ if not openaikey.value:
90
+ pn.state.notifications.error('Missing API key.', duration=2000)
91
+ return pn.Column(*convos, margin=15, width=575, min_height=400)
92
+
93
+ # save pdf file to a temp file
94
+ if file_input.value is not None:
95
+ file_input.save("temp.pdf")
96
+
97
+ prompt_text = prompt.value
98
+ if prompt_text:
99
+ result = qa(file="temp.pdf", query=prompt_text, chain_type=select_chain_type.value,
100
+ k=select_k.value)
101
+ if result.get('error') is None:
102
+ convos.extend([
103
+ pn.Row(
104
+ pn.panel("\U0001F60A", width=10),
105
+ prompt_text,
106
+ width=600
107
+ ),
108
+ pn.Row(
109
+ pn.panel("\U0001F916", width=10),
110
+ pn.Column(
111
+ result["result"],
112
+ "Relevant source Panel Applicationtext:",
113
+ pn.pane.Markdown(
114
+ '\n--------------------------------------------------------------------\n'.join(
115
+ doc.page_content for doc in result["source_documents"]))
116
+ )
117
+ )
118
+ ])
119
+ else:
120
+ pn.state.notifications.error(result['error'], duration=2000)
121
+ else:
122
+ pn.state.notifications.error('Missing prompt.', duration=2000)
123
+ else:
124
+ pn.state.notifications.error('Missing file.', duration=2000)
125
+ return pn.Column(*convos, margin=15, width=575, min_height=400)
126
+
127
+
128
+ qa_interactive = pn.panel(
129
+ pn.bind(qa_result, run_button),
130
+ loading_indicator=True,
131
+ )
132
+
133
+ output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)
134
+
135
+ # layout
136
+ pn.Column(
137
+ pn.pane.Markdown("""
138
+ ## \U0001F60A! Question Answering with your PDF file
139
+ (original implementation: @sophiamyang)
140
+
141
+ 1) Upload a PDF. 2) Enter OpenAI API key. 3) Type a question and click "Run".
142
+
143
+ """),
144
+ pn.Row(file_input, openaikey),
145
+ output,
146
+ widgets
147
+
148
+ ).servable()