elia-waefler commited on
Commit
c030ff5
·
verified ·
1 Parent(s): 020a9c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -252
app.py CHANGED
@@ -1,252 +1,234 @@
1
- """
2
- testing my own vectors
3
-
4
- list comprehension whenever possible
5
- main function
6
- if name == main
7
- reusable functions that do just one specific task
8
- type checking
9
- def my_function(in_one: str, in_two: int) -> None:
10
- pip install mypy for static typechecking.
11
-
12
-
13
- False
14
- None
15
- True
16
- and
17
- as
18
- assert
19
- async
20
- await
21
- in
22
- break
23
- class
24
- continue
25
- de
26
- del
27
- elif
28
- else
29
- except
30
- finally
31
- for
32
- from
33
- global
34
- if
35
- import
36
- Warningsis
37
- lambda
38
- nonlocal
39
- Not
40
- Keyword Args
41
-
42
-
43
- """
44
-
45
- import ingest
46
- import my_2_sim_search
47
- import my_vectors
48
- import setup_db
49
- import my_new_openai
50
- import time
51
- import streamlit as st
52
- import os
53
- from PIL import Image
54
-
55
-
56
- def merge_indices(index1, index2):
57
- """
58
- Merge two indices into a new index, assuming both are of the same type and dimensionality.
59
- """
60
- pass
61
-
62
-
63
- def handle_userinput(user_question):
64
- pass
65
-
66
-
67
- def save_uploaded_file(uploaded_file):
68
- try:
69
- # Create a static folder if it doesn't exist
70
- if not os.path.exists('static'):
71
- os.makedirs('static')
72
-
73
- # Write the uploaded file to a new file in the static directory
74
- with open(os.path.join('static', uploaded_file.name), "wb") as f:
75
- f.write(uploaded_file.getbuffer())
76
- return True
77
- except Exception as e:
78
- print(e)
79
- return False
80
-
81
-
82
- def main():
83
- st.set_page_config(page_title="Anna Seiler Haus KI-Assistent", page_icon=":hospital:")
84
- if True:
85
- if "conversation" not in sst:
86
- sst.conversation = None
87
- if "chat_history" not in sst:
88
- sst.chat_history = None
89
- if "page" not in sst:
90
- sst.page = "home"
91
- if "openai" not in sst:
92
- sst.openai = True
93
- if "login" not in sst:
94
- sst.login = False
95
- if 'submitted_user_query' not in sst:
96
- sst.submitted_user_query = ''
97
- if 'submitted_user_safe' not in sst:
98
- sst.submitted_user_safe = ''
99
- if 'submitted_user_load' not in sst:
100
- sst.submitted_user_load = ''
101
- if 'widget_user_load' not in sst:
102
- sst.widget_user_load = 'U3_alle' # Init the vectorstore
103
- if 'vectorstore' not in sst:
104
- sst.vectorstore = None
105
-
106
- def submit_user_query():
107
- sst.submitted_user_query = sst.widget_user_query
108
- sst.widget_user_query = ''
109
-
110
- def submit_user_safe():
111
- sst.submitted_user_safe = sst.widget_user_safe
112
- sst.widget_user_safe = ''
113
- if sst.vectorstore is not None:
114
- my_vectors.save_local(sst.vectorstore, path=sst.submitted_user_safe)
115
- st.sidebar.success("saved")
116
- else:
117
- st.sidebar.warning("No embeddings to save. Please process documents first.")
118
-
119
- def submit_user_load():
120
- sst.submitted_user_load = sst.widget_user_load
121
- sst.widget_user_load = ''
122
- if os.path.exists(sst.submitted_user_load):
123
- new_db = my_vectors.load_local(f"{sst.submitted_user_load}/faiss_index.index")
124
- if sst.vectorstore is not None:
125
- if new_db is not None: # Check if this is working
126
- st.sidebar.success("Vectors loaded")
127
- else:
128
- if new_db is not None: # Check if this is working
129
- sst.vectorstore = new_db
130
- st.sidebar.success("Vectors loaded")
131
- else:
132
- st.sidebar.warning("Couldn't load/find embeddings")
133
-
134
- st.header("Anna Seiler Haus KI-Assistent ASH :hospital:")
135
- if st.toggle("show README"):
136
-
137
- st.subheader("Funktion: ")
138
- st.write("dieses proof-of-concept von Elia Wäfler demonstriert das Potential von RAG (Retrival Augmented Generation) für BIM2FM Dokumentenablagen am Beispiel Dokumente U3 ASH (Anna Seiler Haus, Inselspital Bern). chatte mit den Dokumenten, oder lade selber ein oder mehrere PDF-Dokumente hoch, um RAG auszuprobieren. die vektoren werden lokal oder im st.session_state gespeichert. Feedback und Bugs gerne an elia.waefler@insel.ch")
139
- st.write("Vielen Dank.")
140
- st.write("")
141
-
142
- st.subheader("Licence and credits")
143
- st.write("THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.")
144
- st.write("special thanks to OpenAI, STREAMLIT, HUGGINGFACE, LANGCHAIN and alejandro-ao")
145
- l, r = st.columns(2)
146
- with l:
147
- st.subheader("Limitationen: ")
148
- st.write("bisher nur Text aus PDFs")
149
- st.write("macht Fehler, kann falsche Informationen geben")
150
- st.write("prompts werden bisher nicht geprüft")
151
- st.write("")
152
- with r:
153
- st.subheader("geplante Erweiterungen:")
154
- st.write("Tabellen, Bilder werden auch vektorisiert, um die retrival qualität zu verbessern")
155
- st.write("on premise anwendung mit mistral 7b oder vergleichbar")
156
- st.write("Ecodomus API einbinden, um alle Dokumente einzubinden.")
157
- st.write("")
158
-
159
- if sst.login:
160
- if st.toggle("RAG / classifier"):
161
- #user_question = st.text_input("Ask a question about your documents:", key="user_query", on_change=handle_query)
162
- st.text_input('Ask a question about your documents:', key='widget_user_query', on_change=submit_user_query)
163
- #sst.openai = st.toggle(label="use openai?")
164
- if sst.submitted_user_query:
165
- if sst.vectorstore is not None:
166
- handle_userinput(sst.submitted_user_query)
167
- sst.submitted_user_query = False
168
- else:
169
- st.warning("no vectorstore loaded.")
170
-
171
- with st.sidebar:
172
- st.subheader("Your documents")
173
- pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
174
- if st.button("Process"):
175
- with st.spinner("Processing"):
176
- vec = ingest.get_text_chunks(ingest.get_pdf_text(pdf_docs))
177
- st.warning("only text")
178
- sst.vectorstore = vec
179
- sst.conversation = vec
180
- st.success("embedding complete")
181
- st.text_input('Safe Embeddings to: (copy path of folder)', key='widget_user_safe',
182
- on_change=submit_user_safe)
183
- st.text_input('Load Embeddings from: (copy path of folder)', key='widget_user_load',
184
- on_change=submit_user_load)
185
- if st.toggle("reset vectorstore?"):
186
- if st.button("Yes, reset"):
187
- sst.vectorstore = None
188
- st.warning("vectorstore reset complete")
189
- else:
190
- st.warning("unsaved embeddings will be lost.")
191
- else:
192
- vec_store = setup_db.load_vectorstore_from_excel("data/KBOB_Klassifizierung.xlsx")
193
- sst.page = "home"
194
- file = st.file_uploader("upload file", accept_multiple_files=False)
195
- if st.button("classify me!"):
196
- with st.spinner("Classifying..."):
197
- query_vecs = []
198
- if file.type == "application/pdf":
199
- one, two, three, four, five = st.columns(5)
200
- text = ingest.get_pdf_text(file)
201
- with one:
202
- st.success("text")
203
- # ONE OR MULTIPLE IS THE QUESTION
204
- images = ingest.get_pdf_images(file.getvalue())
205
- if type(images) != list:
206
- images = [images]
207
- for img in images:
208
- text += my_new_openai.img_to_text(img_base64=my_new_openai.image_bytes_to_base64(img))
209
- with two:
210
- st.success("images")
211
-
212
- tabs = ingest.get_pdf_tables(file.getvalue())
213
-
214
- if type(tabs) != list:
215
- tabs = [tabs]
216
- for tab in tabs:
217
- text += my_new_openai.table_to_text(table=tab)
218
- with three:
219
- st.success("tabs")
220
- full_search = my_new_openai.vectorize_data(text)
221
- detail_search = [my_new_openai.vectorize_data(_) for _ in ingest.get_text_chunks(text)]
222
- with four:
223
- st.success("vecs")
224
- st.write(len(list(vec_store.keys())))
225
- sorted_vec_table = my_2_sim_search.sim_search_fly(vec_table=vec_store, term=full_search)
226
- st.success("sim search")
227
- st.write(f"len of list of categories {len(list(sorted_vec_table.keys()))}")
228
- st.write(f"the most fitting category is {next(iter(sorted_vec_table))}")
229
- for vec in detail_search:
230
- pass
231
- else:
232
- st.error()
233
- else:
234
- user_pw = st.text_input("ASK_ASH_PASSWORD: ", type="password")
235
- if st.button("check"):
236
- time.sleep(0.5)
237
- if user_pw == ASK_ASH_PASSWORD:
238
- sst.login = True
239
- if "first_load" not in sst:
240
- submit_user_load()
241
- sst.first_load = True
242
- st.rerun()
243
-
244
-
245
- if __name__ == '__main__':
246
- if True:
247
- OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
248
- OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
249
- HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
250
- sst = st.session_state
251
- ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
252
- main()
 
1
+ """
2
+ testing my own vectors
3
+
4
+ list comprehension whenever possible
5
+ main function
6
+ if name == main
7
+ reusable functions that do just one specific task
8
+ type checking
9
+ def my_function(in_one: str, in_two: int) -> None:
10
+ pip install mypy for static typechecking.
11
+
12
+ O Gebäudebetrieb
13
+ Reinigung
14
+
15
+
16
+ FM Prozesse nicht für klassifizierung
17
+ Phase auch nicht. IMMER 53!!
18
+
19
+ VISION: AUTOMATISCHE BENENNUNG BEI ECODOMUS UPLOAD
20
+ Automatische metadatenzuodrdnung
21
+
22
+
23
+
24
+
25
+ """
26
+
27
+ import ingest
28
+ import my_2_sim_search
29
+ import my_vectors
30
+ import setup_db
31
+ import my_new_openai
32
+ import time
33
+ import streamlit as st
34
+ import os
35
+ from PIL import Image
36
+
37
+
38
+ def merge_indices(index1, index2):
39
+ """
40
+ Merge two indices into a new index, assuming both are of the same type and dimensionality.
41
+ """
42
+ pass
43
+
44
+
45
+ def handle_userinput(user_question):
46
+ pass
47
+
48
+
49
+ def save_uploaded_file(uploaded_file):
50
+ try:
51
+ # Create a static folder if it doesn't exist
52
+ if not os.path.exists('static'):
53
+ os.makedirs('static')
54
+
55
+ # Write the uploaded file to a new file in the static directory
56
+ with open(os.path.join('static', uploaded_file.name), "wb") as f:
57
+ f.write(uploaded_file.getbuffer())
58
+ return True
59
+ except Exception as e:
60
+ print(e)
61
+ return False
62
+
63
+
64
+ def main():
65
+ st.set_page_config(page_title="Anna Seiler Haus KI-Assistent", page_icon=":hospital:")
66
+ if True:
67
+ if "conversation" not in sst:
68
+ sst.conversation = None
69
+ if "chat_history" not in sst:
70
+ sst.chat_history = None
71
+ if "page" not in sst:
72
+ sst.page = "home"
73
+ if "openai" not in sst:
74
+ sst.openai = True
75
+ if "login" not in sst:
76
+ sst.login = False
77
+ if 'submitted_user_query' not in sst:
78
+ sst.submitted_user_query = ''
79
+ if 'submitted_user_safe' not in sst:
80
+ sst.submitted_user_safe = ''
81
+ if 'submitted_user_load' not in sst:
82
+ sst.submitted_user_load = ''
83
+ if 'widget_user_load' not in sst:
84
+ sst.widget_user_load = 'U3_alle' # Init the vectorstore
85
+ if 'vectorstore' not in sst:
86
+ sst.vectorstore = None
87
+
88
+ def submit_user_query():
89
+ sst.submitted_user_query = sst.widget_user_query
90
+ sst.widget_user_query = ''
91
+
92
+ def submit_user_safe():
93
+ sst.submitted_user_safe = sst.widget_user_safe
94
+ sst.widget_user_safe = ''
95
+ if sst.vectorstore is not None:
96
+ my_vectors.save_local(sst.vectorstore, path=sst.submitted_user_safe)
97
+ st.sidebar.success("saved")
98
+ else:
99
+ st.sidebar.warning("No embeddings to save. Please process documents first.")
100
+
101
+ def submit_user_load():
102
+ sst.submitted_user_load = sst.widget_user_load
103
+ sst.widget_user_load = ''
104
+ if os.path.exists(sst.submitted_user_load):
105
+ new_db = my_vectors.load_local(f"{sst.submitted_user_load}/faiss_index.index")
106
+ if sst.vectorstore is not None:
107
+ if new_db is not None: # Check if this is working
108
+ st.sidebar.success("Vectors loaded")
109
+ else:
110
+ if new_db is not None: # Check if this is working
111
+ sst.vectorstore = new_db
112
+ st.sidebar.success("Vectors loaded")
113
+ else:
114
+ st.sidebar.warning("Couldn't load/find embeddings")
115
+
116
+ st.header("Anna Seiler Haus KI-Assistent ASH :hospital:")
117
+ if st.toggle("show README"):
118
+
119
+ st.subheader("Funktion: ")
120
+ st.write("dieses proof-of-concept von Elia Wäfler demonstriert das Potential von RAG (Retrival Augmented Generation) für BIM2FM Dokumentenablagen am Beispiel Dokumente U3 ASH (Anna Seiler Haus, Inselspital Bern). chatte mit den Dokumenten, oder lade selber ein oder mehrere PDF-Dokumente hoch, um RAG auszuprobieren. die vektoren werden lokal oder im st.session_state gespeichert. Feedback und Bugs gerne an elia.waefler@insel.ch")
121
+ st.write("Vielen Dank.")
122
+ st.write("")
123
+
124
+ st.subheader("Licence and credits")
125
+ st.write("THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.")
126
+ st.write("special thanks to OpenAI, STREAMLIT, HUGGINGFACE, LANGCHAIN and alejandro-ao")
127
+ l, r = st.columns(2)
128
+ with l:
129
+ st.subheader("Limitationen: ")
130
+ st.write("bisher nur Text aus PDFs")
131
+ st.write("macht Fehler, kann falsche Informationen geben")
132
+ st.write("prompts werden bisher nicht geprüft")
133
+ st.write("")
134
+ with r:
135
+ st.subheader("geplante Erweiterungen:")
136
+ st.write("Tabellen, Bilder werden auch vektorisiert, um die retrival qualität zu verbessern")
137
+ st.write("on premise anwendung mit mistral 7b oder vergleichbar")
138
+ st.write("Ecodomus API einbinden, um alle Dokumente einzubinden.")
139
+ st.write("")
140
+
141
+ if sst.login:
142
+ if st.toggle("RAG / classifier"):
143
+ #user_question = st.text_input("Ask a question about your documents:", key="user_query", on_change=handle_query)
144
+ st.text_input('Ask a question about your documents:', key='widget_user_query', on_change=submit_user_query)
145
+ #sst.openai = st.toggle(label="use openai?")
146
+ if sst.submitted_user_query:
147
+ if sst.vectorstore is not None:
148
+ handle_userinput(sst.submitted_user_query)
149
+ sst.submitted_user_query = False
150
+ else:
151
+ st.warning("no vectorstore loaded.")
152
+
153
+ with st.sidebar:
154
+ st.subheader("Your documents")
155
+ pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
156
+ if st.button("Process"):
157
+ with st.spinner("Processing"):
158
+ vec = ingest.get_text_chunks(ingest.get_pdf_text(pdf_docs))
159
+ st.warning("only text")
160
+ sst.vectorstore = vec
161
+ sst.conversation = vec
162
+ st.success("embedding complete")
163
+ st.text_input('Safe Embeddings to: (copy path of folder)', key='widget_user_safe',
164
+ on_change=submit_user_safe)
165
+ st.text_input('Load Embeddings from: (copy path of folder)', key='widget_user_load',
166
+ on_change=submit_user_load)
167
+ if st.toggle("reset vectorstore?"):
168
+ if st.button("Yes, reset"):
169
+ sst.vectorstore = None
170
+ st.warning("vectorstore reset complete")
171
+ else:
172
+ st.warning("unsaved embeddings will be lost.")
173
+ else:
174
+ vec_store = setup_db.load_vectorstore_from_excel("data/KBOB_Klassifizierung.xlsx")
175
+ sst.page = "home"
176
+ file = st.file_uploader("upload file", accept_multiple_files=False)
177
+ if st.button("classify me!"):
178
+ with st.spinner("Classifying..."):
179
+ query_vecs = []
180
+ if file.type == "application/pdf":
181
+ one, two, three, four, five = st.columns(5)
182
+ text = ingest.get_pdf_text(file)
183
+ with one:
184
+ st.success("text")
185
+ # ONE OR MULTIPLE IS THE QUESTION
186
+ images = ingest.get_pdf_images(file.getvalue())
187
+ if type(images) != list:
188
+ images = [images]
189
+ for img in images:
190
+ text += my_new_openai.img_to_text(img_base64=my_new_openai.image_bytes_to_base64(img))
191
+ with two:
192
+ st.success("images")
193
+
194
+ tabs = ingest.get_pdf_tables(file.getvalue())
195
+
196
+ if type(tabs) != list:
197
+ tabs = [tabs]
198
+ for tab in tabs:
199
+ text += my_new_openai.table_to_text(table=tab)
200
+ with three:
201
+ st.success("tabs")
202
+ full_search = my_new_openai.vectorize_data(text)
203
+ detail_search = [my_new_openai.vectorize_data(_) for _ in ingest.get_text_chunks(text)]
204
+ with four:
205
+ st.success("vecs")
206
+ st.write(len(list(vec_store.keys())))
207
+ sorted_vec_table = my_2_sim_search.sim_search_fly(vec_table=vec_store, term=full_search)
208
+ st.success("sim search")
209
+ st.write(f"len of list of categories {len(list(sorted_vec_table.keys()))}")
210
+ st.write(f"the most fitting category is {next(iter(sorted_vec_table))}")
211
+ for vec in detail_search:
212
+ pass
213
+ else:
214
+ st.error()
215
+ else:
216
+ user_pw = st.text_input("ASK_ASH_PASSWORD: ", type="password")
217
+ if st.button("check"):
218
+ time.sleep(0.5)
219
+ if user_pw == ASK_ASH_PASSWORD:
220
+ sst.login = True
221
+ if "first_load" not in sst:
222
+ submit_user_load()
223
+ sst.first_load = True
224
+ st.rerun()
225
+
226
+
227
+ if __name__ == '__main__':
228
+ if True:
229
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
230
+ OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
231
+ HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
232
+ sst = st.session_state
233
+ ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
234
+ main()