Spaces:
Running
Running
AFischer1985
commited on
Commit
•
7151b09
1
Parent(s):
9b0d9aa
Update run.py
Browse files
run.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
-
# Last update: October
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
@@ -108,23 +108,26 @@ def split_with_overlap(text,chunk_size=3500, overlap=700):
|
|
108 |
chunks.append(text[i:end])
|
109 |
return chunks
|
110 |
|
111 |
-
|
|
|
112 |
print("def add_doc!")
|
113 |
print(path)
|
114 |
anhang=False
|
115 |
if(str.lower(path).endswith(".pdf") and os.path.exists(path)):
|
116 |
doc=convertPDF(path)
|
117 |
if(len(doc[0])>5):
|
118 |
-
gr.Info("PDF uploaded, start Indexing excerpt (first 5 pages)!")
|
119 |
else:
|
120 |
-
gr.Info("PDF uploaded, start Indexing!")
|
121 |
doc="\n\n".join(doc[0][0:5])
|
122 |
anhang=True
|
123 |
-
|
|
|
124 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
125 |
print(str(client.list_collections()))
|
126 |
#global collection
|
127 |
-
|
|
|
128 |
if(not "name="+dbName in str(client.list_collections())):
|
129 |
# client.delete_collection(name=dbName)
|
130 |
collection = client.create_collection(
|
@@ -157,17 +160,28 @@ def add_doc(path):
|
|
157 |
return(collection)
|
158 |
|
159 |
#split_with_overlap("test me if you can",2,1)
|
|
|
|
|
160 |
|
161 |
import gradio as gr
|
162 |
import re
|
163 |
-
def multimodalResponse(message,history,dropdown):
|
164 |
print("def multimodal response!")
|
|
|
|
|
|
|
|
|
|
|
165 |
length=str(len(history))
|
|
|
|
|
|
|
|
|
166 |
query=message["text"]
|
167 |
if(len(message["files"])>0): # is there at least one file attached?
|
168 |
-
collection=add_doc(message["files"][0])
|
169 |
-
else:
|
170 |
-
collection=add_doc(message["text"])
|
171 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
172 |
print(str(client.list_collections()))
|
173 |
x=collection.get(include=[])["ids"]
|
@@ -214,11 +228,3 @@ i.launch() #allowed_paths=["."])
|
|
214 |
|
215 |
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
+
# Last update: October 12th, 2024
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
|
|
108 |
chunks.append(text[i:end])
|
109 |
return chunks
|
110 |
|
111 |
+
|
112 |
+
def add_doc(path, session):
|
113 |
print("def add_doc!")
|
114 |
print(path)
|
115 |
anhang=False
|
116 |
if(str.lower(path).endswith(".pdf") and os.path.exists(path)):
|
117 |
doc=convertPDF(path)
|
118 |
if(len(doc[0])>5):
|
119 |
+
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (first 5 pages)!")
|
120 |
else:
|
121 |
+
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
|
122 |
doc="\n\n".join(doc[0][0:5])
|
123 |
anhang=True
|
124 |
+
else:
|
125 |
+
gr.Info("No PDF attached - answer based on DB_"+str(session)+".")
|
126 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
127 |
print(str(client.list_collections()))
|
128 |
#global collection
|
129 |
+
print(str(session))
|
130 |
+
dbName="DB_"+str(session)
|
131 |
if(not "name="+dbName in str(client.list_collections())):
|
132 |
# client.delete_collection(name=dbName)
|
133 |
collection = client.create_collection(
|
|
|
160 |
return(collection)
|
161 |
|
162 |
#split_with_overlap("test me if you can",2,1)
|
163 |
+
from datetime import date
|
164 |
+
databases=[(date.today(),"0")] # list of all databases
|
165 |
|
166 |
import gradio as gr
|
167 |
import re
|
168 |
+
def multimodalResponse(message,history,dropdown, request: gr.Request):
|
169 |
print("def multimodal response!")
|
170 |
+
global databases
|
171 |
+
if request:
|
172 |
+
session=request.session_hash
|
173 |
+
else:
|
174 |
+
session="0"
|
175 |
length=str(len(history))
|
176 |
+
print(databases)
|
177 |
+
if(not databases[-1][1]==session):
|
178 |
+
databases.append((date.today(),session))
|
179 |
+
#print(databases)
|
180 |
query=message["text"]
|
181 |
if(len(message["files"])>0): # is there at least one file attached?
|
182 |
+
collection=add_doc(message["files"][0], session)
|
183 |
+
else: # otherwise, you still want to get the collection with the session-based db
|
184 |
+
collection=add_doc(message["text"], session)
|
185 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
186 |
print(str(client.list_collections()))
|
187 |
x=collection.get(include=[])["ids"]
|
|
|
228 |
|
229 |
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|