Spaces:
Runtime error
Runtime error
elia-waefler
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,252 +1,234 @@
|
|
1 |
-
"""
|
2 |
-
testing my own vectors
|
3 |
-
|
4 |
-
list comprehension whenever possible
|
5 |
-
main function
|
6 |
-
if name == main
|
7 |
-
reusable functions that do just one specific task
|
8 |
-
type checking
|
9 |
-
def my_function(in_one: str, in_two: int) -> None:
|
10 |
-
pip install mypy for static typechecking.
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
import
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
if
|
86 |
-
sst.
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
if
|
96 |
-
sst.
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
st.
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
st.
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
st.
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
if st.button("check"):
|
236 |
-
time.sleep(0.5)
|
237 |
-
if user_pw == ASK_ASH_PASSWORD:
|
238 |
-
sst.login = True
|
239 |
-
if "first_load" not in sst:
|
240 |
-
submit_user_load()
|
241 |
-
sst.first_load = True
|
242 |
-
st.rerun()
|
243 |
-
|
244 |
-
|
245 |
-
if __name__ == '__main__':
|
246 |
-
if True:
|
247 |
-
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
248 |
-
OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
|
249 |
-
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
|
250 |
-
sst = st.session_state
|
251 |
-
ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
|
252 |
-
main()
|
|
|
1 |
+
"""
|
2 |
+
testing my own vectors
|
3 |
+
|
4 |
+
list comprehension whenever possible
|
5 |
+
main function
|
6 |
+
if name == main
|
7 |
+
reusable functions that do just one specific task
|
8 |
+
type checking
|
9 |
+
def my_function(in_one: str, in_two: int) -> None:
|
10 |
+
pip install mypy for static typechecking.
|
11 |
+
|
12 |
+
O Gebäudebetrieb
|
13 |
+
Reinigung
|
14 |
+
|
15 |
+
|
16 |
+
FM Prozesse nicht für klassifizierung
|
17 |
+
Phase auch nicht. IMMER 53!!
|
18 |
+
|
19 |
+
VISION: AUTOMATISCHE BENENNUNG BEI ECODOMUS UPLOAD
|
20 |
+
Automatische metadatenzuodrdnung
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
"""
|
26 |
+
|
27 |
+
import ingest
|
28 |
+
import my_2_sim_search
|
29 |
+
import my_vectors
|
30 |
+
import setup_db
|
31 |
+
import my_new_openai
|
32 |
+
import time
|
33 |
+
import streamlit as st
|
34 |
+
import os
|
35 |
+
from PIL import Image
|
36 |
+
|
37 |
+
|
38 |
+
def merge_indices(index1, index2):
|
39 |
+
"""
|
40 |
+
Merge two indices into a new index, assuming both are of the same type and dimensionality.
|
41 |
+
"""
|
42 |
+
pass
|
43 |
+
|
44 |
+
|
45 |
+
def handle_userinput(user_question):
|
46 |
+
pass
|
47 |
+
|
48 |
+
|
49 |
+
def save_uploaded_file(uploaded_file):
|
50 |
+
try:
|
51 |
+
# Create a static folder if it doesn't exist
|
52 |
+
if not os.path.exists('static'):
|
53 |
+
os.makedirs('static')
|
54 |
+
|
55 |
+
# Write the uploaded file to a new file in the static directory
|
56 |
+
with open(os.path.join('static', uploaded_file.name), "wb") as f:
|
57 |
+
f.write(uploaded_file.getbuffer())
|
58 |
+
return True
|
59 |
+
except Exception as e:
|
60 |
+
print(e)
|
61 |
+
return False
|
62 |
+
|
63 |
+
|
64 |
+
def main():
|
65 |
+
st.set_page_config(page_title="Anna Seiler Haus KI-Assistent", page_icon=":hospital:")
|
66 |
+
if True:
|
67 |
+
if "conversation" not in sst:
|
68 |
+
sst.conversation = None
|
69 |
+
if "chat_history" not in sst:
|
70 |
+
sst.chat_history = None
|
71 |
+
if "page" not in sst:
|
72 |
+
sst.page = "home"
|
73 |
+
if "openai" not in sst:
|
74 |
+
sst.openai = True
|
75 |
+
if "login" not in sst:
|
76 |
+
sst.login = False
|
77 |
+
if 'submitted_user_query' not in sst:
|
78 |
+
sst.submitted_user_query = ''
|
79 |
+
if 'submitted_user_safe' not in sst:
|
80 |
+
sst.submitted_user_safe = ''
|
81 |
+
if 'submitted_user_load' not in sst:
|
82 |
+
sst.submitted_user_load = ''
|
83 |
+
if 'widget_user_load' not in sst:
|
84 |
+
sst.widget_user_load = 'U3_alle' # Init the vectorstore
|
85 |
+
if 'vectorstore' not in sst:
|
86 |
+
sst.vectorstore = None
|
87 |
+
|
88 |
+
def submit_user_query():
|
89 |
+
sst.submitted_user_query = sst.widget_user_query
|
90 |
+
sst.widget_user_query = ''
|
91 |
+
|
92 |
+
def submit_user_safe():
|
93 |
+
sst.submitted_user_safe = sst.widget_user_safe
|
94 |
+
sst.widget_user_safe = ''
|
95 |
+
if sst.vectorstore is not None:
|
96 |
+
my_vectors.save_local(sst.vectorstore, path=sst.submitted_user_safe)
|
97 |
+
st.sidebar.success("saved")
|
98 |
+
else:
|
99 |
+
st.sidebar.warning("No embeddings to save. Please process documents first.")
|
100 |
+
|
101 |
+
def submit_user_load():
|
102 |
+
sst.submitted_user_load = sst.widget_user_load
|
103 |
+
sst.widget_user_load = ''
|
104 |
+
if os.path.exists(sst.submitted_user_load):
|
105 |
+
new_db = my_vectors.load_local(f"{sst.submitted_user_load}/faiss_index.index")
|
106 |
+
if sst.vectorstore is not None:
|
107 |
+
if new_db is not None: # Check if this is working
|
108 |
+
st.sidebar.success("Vectors loaded")
|
109 |
+
else:
|
110 |
+
if new_db is not None: # Check if this is working
|
111 |
+
sst.vectorstore = new_db
|
112 |
+
st.sidebar.success("Vectors loaded")
|
113 |
+
else:
|
114 |
+
st.sidebar.warning("Couldn't load/find embeddings")
|
115 |
+
|
116 |
+
st.header("Anna Seiler Haus KI-Assistent ASH :hospital:")
|
117 |
+
if st.toggle("show README"):
|
118 |
+
|
119 |
+
st.subheader("Funktion: ")
|
120 |
+
st.write("dieses proof-of-concept von Elia Wäfler demonstriert das Potential von RAG (Retrival Augmented Generation) für BIM2FM Dokumentenablagen am Beispiel Dokumente U3 ASH (Anna Seiler Haus, Inselspital Bern). chatte mit den Dokumenten, oder lade selber ein oder mehrere PDF-Dokumente hoch, um RAG auszuprobieren. die vektoren werden lokal oder im st.session_state gespeichert. Feedback und Bugs gerne an elia.waefler@insel.ch")
|
121 |
+
st.write("Vielen Dank.")
|
122 |
+
st.write("")
|
123 |
+
|
124 |
+
st.subheader("Licence and credits")
|
125 |
+
st.write("THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.")
|
126 |
+
st.write("special thanks to OpenAI, STREAMLIT, HUGGINGFACE, LANGCHAIN and alejandro-ao")
|
127 |
+
l, r = st.columns(2)
|
128 |
+
with l:
|
129 |
+
st.subheader("Limitationen: ")
|
130 |
+
st.write("bisher nur Text aus PDFs")
|
131 |
+
st.write("macht Fehler, kann falsche Informationen geben")
|
132 |
+
st.write("prompts werden bisher nicht geprüft")
|
133 |
+
st.write("")
|
134 |
+
with r:
|
135 |
+
st.subheader("geplante Erweiterungen:")
|
136 |
+
st.write("Tabellen, Bilder werden auch vektorisiert, um die retrival qualität zu verbessern")
|
137 |
+
st.write("on premise anwendung mit mistral 7b oder vergleichbar")
|
138 |
+
st.write("Ecodomus API einbinden, um alle Dokumente einzubinden.")
|
139 |
+
st.write("")
|
140 |
+
|
141 |
+
if sst.login:
|
142 |
+
if st.toggle("RAG / classifier"):
|
143 |
+
#user_question = st.text_input("Ask a question about your documents:", key="user_query", on_change=handle_query)
|
144 |
+
st.text_input('Ask a question about your documents:', key='widget_user_query', on_change=submit_user_query)
|
145 |
+
#sst.openai = st.toggle(label="use openai?")
|
146 |
+
if sst.submitted_user_query:
|
147 |
+
if sst.vectorstore is not None:
|
148 |
+
handle_userinput(sst.submitted_user_query)
|
149 |
+
sst.submitted_user_query = False
|
150 |
+
else:
|
151 |
+
st.warning("no vectorstore loaded.")
|
152 |
+
|
153 |
+
with st.sidebar:
|
154 |
+
st.subheader("Your documents")
|
155 |
+
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
|
156 |
+
if st.button("Process"):
|
157 |
+
with st.spinner("Processing"):
|
158 |
+
vec = ingest.get_text_chunks(ingest.get_pdf_text(pdf_docs))
|
159 |
+
st.warning("only text")
|
160 |
+
sst.vectorstore = vec
|
161 |
+
sst.conversation = vec
|
162 |
+
st.success("embedding complete")
|
163 |
+
st.text_input('Safe Embeddings to: (copy path of folder)', key='widget_user_safe',
|
164 |
+
on_change=submit_user_safe)
|
165 |
+
st.text_input('Load Embeddings from: (copy path of folder)', key='widget_user_load',
|
166 |
+
on_change=submit_user_load)
|
167 |
+
if st.toggle("reset vectorstore?"):
|
168 |
+
if st.button("Yes, reset"):
|
169 |
+
sst.vectorstore = None
|
170 |
+
st.warning("vectorstore reset complete")
|
171 |
+
else:
|
172 |
+
st.warning("unsaved embeddings will be lost.")
|
173 |
+
else:
|
174 |
+
vec_store = setup_db.load_vectorstore_from_excel("data/KBOB_Klassifizierung.xlsx")
|
175 |
+
sst.page = "home"
|
176 |
+
file = st.file_uploader("upload file", accept_multiple_files=False)
|
177 |
+
if st.button("classify me!"):
|
178 |
+
with st.spinner("Classifying..."):
|
179 |
+
query_vecs = []
|
180 |
+
if file.type == "application/pdf":
|
181 |
+
one, two, three, four, five = st.columns(5)
|
182 |
+
text = ingest.get_pdf_text(file)
|
183 |
+
with one:
|
184 |
+
st.success("text")
|
185 |
+
# ONE OR MULTIPLE IS THE QUESTION
|
186 |
+
images = ingest.get_pdf_images(file.getvalue())
|
187 |
+
if type(images) != list:
|
188 |
+
images = [images]
|
189 |
+
for img in images:
|
190 |
+
text += my_new_openai.img_to_text(img_base64=my_new_openai.image_bytes_to_base64(img))
|
191 |
+
with two:
|
192 |
+
st.success("images")
|
193 |
+
|
194 |
+
tabs = ingest.get_pdf_tables(file.getvalue())
|
195 |
+
|
196 |
+
if type(tabs) != list:
|
197 |
+
tabs = [tabs]
|
198 |
+
for tab in tabs:
|
199 |
+
text += my_new_openai.table_to_text(table=tab)
|
200 |
+
with three:
|
201 |
+
st.success("tabs")
|
202 |
+
full_search = my_new_openai.vectorize_data(text)
|
203 |
+
detail_search = [my_new_openai.vectorize_data(_) for _ in ingest.get_text_chunks(text)]
|
204 |
+
with four:
|
205 |
+
st.success("vecs")
|
206 |
+
st.write(len(list(vec_store.keys())))
|
207 |
+
sorted_vec_table = my_2_sim_search.sim_search_fly(vec_table=vec_store, term=full_search)
|
208 |
+
st.success("sim search")
|
209 |
+
st.write(f"len of list of categories {len(list(sorted_vec_table.keys()))}")
|
210 |
+
st.write(f"the most fitting category is {next(iter(sorted_vec_table))}")
|
211 |
+
for vec in detail_search:
|
212 |
+
pass
|
213 |
+
else:
|
214 |
+
st.error()
|
215 |
+
else:
|
216 |
+
user_pw = st.text_input("ASK_ASH_PASSWORD: ", type="password")
|
217 |
+
if st.button("check"):
|
218 |
+
time.sleep(0.5)
|
219 |
+
if user_pw == ASK_ASH_PASSWORD:
|
220 |
+
sst.login = True
|
221 |
+
if "first_load" not in sst:
|
222 |
+
submit_user_load()
|
223 |
+
sst.first_load = True
|
224 |
+
st.rerun()
|
225 |
+
|
226 |
+
|
227 |
+
if __name__ == '__main__':
|
228 |
+
if True:
|
229 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
230 |
+
OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
|
231 |
+
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
|
232 |
+
sst = st.session_state
|
233 |
+
ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
|
234 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|