AFischer1985 commited on
Commit
09eaef4
1 Parent(s): 8d8b439

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +353 -74
run.py CHANGED
@@ -1,18 +1,34 @@
1
- #############################################################################################################
2
- # Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
3
  # Author: Andreas Fischer
4
- # Date: December 30th, 2023
5
- # Last update: January 2nd, 2023
6
- ##############################################################################################################
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Chroma-DB
10
  #-----------
11
  import os
12
  import chromadb
13
  dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
14
- if(os.path.exists(dbPath)==False):
15
- dbPath="/home/user/app/db"
16
  print(dbPath)
17
  #client = chromadb.Client()
18
  path=dbPath
@@ -22,23 +38,38 @@ print(client.get_version())
22
  print(client.list_collections())
23
  from chromadb.utils import embedding_functions
24
  default_ef = embedding_functions.DefaultEmbeddingFunction()
25
- sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
26
  #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
 
 
27
  print(str(client.list_collections()))
28
 
29
  global collection
30
- if("name=ChromaDB1" in str(client.list_collections())):
31
- print("ChromaDB1 found!")
32
- collection = client.get_collection(name="ChromaDB1", embedding_function=sentence_transformer_ef)
 
 
 
33
  else:
34
- print("ChromaDB1 created!")
35
  collection = client.create_collection(
36
- "ChromaDB1",
37
- embedding_function=sentence_transformer_ef,
38
  metadata={"hnsw:space": "cosine"})
39
-
40
- collection.add(
41
- documents=[
 
 
 
 
 
 
 
 
 
 
42
  "Text generating AI model mistralai/Mixtral-8x7B-Instruct-v0.1: Suitable for text generation, e.g., social media content, marketing copy, blog posts, short stories, etc.",
43
  "Image generating AI model stabilityai/sdxl-turbo: Suitable for image generation, e.g., illustrations, graphics, AI art, etc.",
44
  "Audio transcribing AI model openai/whisper-large-v3: Suitable for audio-transcription in different languages",
@@ -46,80 +77,328 @@ else:
46
  "Code generating AI model deepseek-ai/deepseek-coder-6.7b-instruct: Suitable for programming in Python, JavaScript, PHP, Bash and many other programming languages.",
47
  "Translation AI model Helsinki-NLP/opus-mt: Suitable for translating text, e.g., from English to German or vice versa",
48
  "Search result-integrating AI model phind/phind-v9-model: Suitable for researching current topics and for obtaining precise and up-to-date answers to questions based on web search results"
49
- ],
50
- metadatas=[{"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}],
51
- ids=["ai1", "ai2", "ai3", "ai4", "ai5", "ai6", "ai7"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  print("Database ready!")
55
  print(collection.count())
56
 
 
 
 
 
 
 
 
 
 
57
 
58
- # Model
59
- #-------
60
 
61
- from huggingface_hub import InferenceClient
62
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- client = InferenceClient(
65
- "mistralai/Mixtral-8x7B-Instruct-v0.1"
66
- #"mistralai/Mistral-7B-Instruct-v0.1"
67
- )
 
 
 
68
 
69
 
70
  # Gradio-GUI
71
  #------------
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  import gradio as gr
 
74
  import json
 
 
 
75
 
76
- def format_prompt(message, history):
77
- prompt = "<s>"
78
- #for user_prompt, bot_response in history:
79
- # prompt += f"[INST] {user_prompt} [/INST]"
80
- # prompt += f" {bot_response}</s> "
81
- prompt += f"[INST] {message} [/INST]"
82
- return prompt
83
-
84
- def response(
85
- prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
86
- ):
87
- temperature = float(temperature)
88
- if temperature < 1e-2: temperature = 1e-2
89
- top_p = float(top_p)
90
- generate_kwargs = dict(
91
- temperature=temperature,
92
- max_new_tokens=max_new_tokens,
93
- top_p=top_p,
94
- repetition_penalty=repetition_penalty,
95
- do_sample=True,
96
- seed=42,
97
- )
98
- addon=""
99
- results=collection.query(
100
- query_texts=[prompt],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  n_results=2,
102
- #where={"source": "google-docs"}
103
  #where_document={"$contains":"search_string"}
104
  )
105
- dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
106
- sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
107
- results=results['documents'][0]
108
- combination = zip(results,dists,sources)
109
  combination = [' '.join(triplets) for triplets in combination]
110
- print(combination)
111
- if(len(results)>1):
112
- addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
113
- system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
114
- #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
115
- formatted_prompt = format_prompt(system+"\n"+prompt, history)
116
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
117
- output = ""
118
- for response in stream:
119
- output += response.token.text
120
- yield output
121
- output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
122
- yield output
123
-
124
- gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
125
- print("Interface up and running!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #########################################################################################
2
+ # Title: German AI-Interface to the Hugging Face Hub with advanced RAG
3
  # Author: Andreas Fischer
4
+ # Date: January 31st, 2023
5
+ # Last update: February 21st, 2024
6
+ ##########################################################################################
7
 
8
+ #https://github.com/abetlen/llama-cpp-python/issues/306
9
+ #sudo apt install libclblast-dev
10
+ #CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir -v
11
+
12
+ # Prepare resources
13
+ #-------------------
14
+ import torch
15
+ import gc
16
+ torch.cuda.empty_cache()
17
+ gc.collect()
18
+
19
+ import os
20
+ from datetime import datetime
21
+ global filename
22
+ filename=f"./{datetime.now().strftime('%Y%m%d')}_history.json" # where to store the history as json-file
23
+ if(os.path.exists(filename)==True): os.remove(filename)
24
 
25
  # Chroma-DB
26
  #-----------
27
  import os
28
  import chromadb
29
  dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
30
+ if(os.path.exists(dbPath)==False): dbPath="/home/user/app/db"
31
+
32
  print(dbPath)
33
  #client = chromadb.Client()
34
  path=dbPath
 
38
  print(client.list_collections())
39
  from chromadb.utils import embedding_functions
40
  default_ef = embedding_functions.DefaultEmbeddingFunction()
41
+ #sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
42
  #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
43
+ embeddingModel = embedding_functions.InstructorEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer", device="cuda")
44
+
45
  print(str(client.list_collections()))
46
 
47
  global collection
48
+ dbName="myDB"
49
+ if("name="+dbName in str(client.list_collections())): client.delete_collection(name=dbName)
50
+
51
+ if("name="+dbName in str(client.list_collections())):
52
+ print(dbName+" found!")
53
+ collection = client.get_collection(name=dbName, embedding_function=embeddingModel )
54
  else:
55
+ print(dbName+" created!")
56
  collection = client.create_collection(
57
+ dbName,
58
+ embedding_function=embeddingModel,
59
  metadata={"hnsw:space": "cosine"})
60
+ # txts0: Intentions
61
+ #------------------
62
+ txts0=[
63
+ "Ich suche ein KI-Programm mit bestimmten Fähigkeiten.", # 1a
64
+ #"Ich suche kein KI-Programm mit bestimmten Fähigkeiten.", # !1a
65
+ "Ich habe ein KI-Programm und habe Fragen zur Benutzung.", # !1a (besser, um 1a und 1b abzugrenzen)
66
+ "Ich habe ein KI-Programm und habe Fragen zur Benutzung.", # 1b
67
+ #"Ich habe kein KI-Programm und habe keine Fragen zur Benutzung.", # !1b
68
+ "Ich habe eine allgemeine Frage ohne KI-Bezug." # !1b (greift besser bei Alltagsfragen)
69
+ ]
70
+ # txts1a: RAG-Infos for first intention:
71
+ #---------------------------------------
72
+ txts1a=[
73
  "Text generating AI model mistralai/Mixtral-8x7B-Instruct-v0.1: Suitable for text generation, e.g., social media content, marketing copy, blog posts, short stories, etc.",
74
  "Image generating AI model stabilityai/sdxl-turbo: Suitable for image generation, e.g., illustrations, graphics, AI art, etc.",
75
  "Audio transcribing AI model openai/whisper-large-v3: Suitable for audio-transcription in different languages",
 
77
  "Code generating AI model deepseek-ai/deepseek-coder-6.7b-instruct: Suitable for programming in Python, JavaScript, PHP, Bash and many other programming languages.",
78
  "Translation AI model Helsinki-NLP/opus-mt: Suitable for translating text, e.g., from English to German or vice versa",
79
  "Search result-integrating AI model phind/phind-v9-model: Suitable for researching current topics and for obtaining precise and up-to-date answers to questions based on web search results"
80
+ ]
81
+ # txts1b: RAG-Infos for second intention
82
+ #----------------------------------------
83
+ txts1b=[
84
+ "Für Fragen zur Umsetzung von KI-Verfahren ist das KI-basierte Assistenzsystem nicht geeignet. Möglicherweise empfiehlt sich ein KI-Modell mit Internetzugriff, wie beispielsweise phind.com, oder das Kontaktieren eines Experten wie Dr. Andreas Fischer (andreasfischer1985@web.de)."
85
+ ]
86
+ #meta=[{"type":"0", "type2":"0","source":"AF"}]*len(txts0)+[{"type":"1a","type2":"0","source":"AF"}]*len(txts1a)+[{"type":"1b","type2":"0","source":"AF"}]*len(txts1b)
87
+ meta = []
88
+ for _ in range(len(txts0)):
89
+ meta.append({"type":"0", "type2":"0","source":"AF"})
90
+ for _ in range(len(txts1a)):
91
+ meta.append({"type":"1a","type2":"0","source":"AF"})
92
+ for _ in range(len(txts1b)):
93
+ meta.append({"type":"1b","type2":"0","source":"AF"})
94
+
95
+ #Change type2 for txt0-entries
96
+ #-----------------------------
97
+ meta[0]["type2"]="1a" # RAG mit txts1a
98
+ meta[1]["type2"]="!1a" # else
99
+ meta[2]["type2"]="1b" # RAG mit txts1b
100
+ meta[3]["type2"]="!1b" # else
101
+ txts=txts0+txts1a+txts1b
102
+ collection.add(
103
+ documents=txts,
104
+ ids=[str(i) for i in list(range(len(txts)))],
105
+ metadatas=meta
106
  )
107
+
108
+ # Add entry to episodic memory
109
+ x=collection.get(include=[])["ids"]
110
+ if(True): #len(x)==0):
111
+ message="Ich bin der User."
112
+ response="Hallo User, wie kann ich dienen?"
113
+ x=collection.get(include=[])["ids"]
114
+ collection.add(
115
+ documents=[message,response],
116
+ metadatas=[
117
+ {"source": "ICH", "dialog": f"ICH: {message}\nDU: {response}", "type":"episode"},
118
+ {"source": "DU", "dialog": f"ICH: {message}\nDU: {response}", "type":"episode"}
119
+ ],
120
+ ids=[str(len(x)+1),str(len(x)+2)]
121
+ )
122
+ RAGResults=collection.query(
123
+ query_texts=[message],
124
+ n_results=1,
125
+ #where={"source": "USER"}
126
+ )
127
+ RAGResults["metadatas"][0][0]["dialog"]
128
+
129
+ x=collection.get(include=[])["ids"]
130
+ x
131
+ collection.get() # Inspect db-entries
132
 
133
  print("Database ready!")
134
  print(collection.count())
135
 
136
+ rag0=collection.query(
137
+ query_texts=[message],
138
+ n_results=4,
139
+ where={"type": "0"}
140
+ )
141
+ x=rag0["metadatas"][0][0]["type2"]
142
+ x=[x["type2"] for x in rag0["metadatas"][0]]
143
+ x.index("1c") if "1c" in x else len(x)+1
144
+
145
 
146
+ # Get model
147
+ #-----------
148
 
149
+ import os
150
+ import requests
151
+
152
+ modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
153
+ if(os.path.exists(modelPath)==False):
154
+ #url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
155
+ #url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
156
+ #url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
157
+ url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
158
+ response = requests.get(url)
159
+ with open("./model.gguf", mode="wb") as file:
160
+ file.write(response.content)
161
+ print("Model downloaded")
162
+ modelPath="./model.gguf"
163
+
164
+ print(modelPath)
165
+
166
+
167
+ # Llama-cpp-Server
168
+ #------------------
169
 
170
+ import subprocess
171
+ n="20"
172
+ if("mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
173
+
174
+ command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
175
+ subprocess.Popen(command)
176
+ print("Server ready!")
177
 
178
 
179
  # Gradio-GUI
180
  #------------
181
 
182
+ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
183
+ if zeichenlimit is None: zeichenlimit=1000000000 # :-)
184
+ template0="[INST] {system} [/INST]</s>" #<s>
185
+ template1="[INST] {message} [/INST] "
186
+ template2="{response}</s>"
187
+ if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
188
+ template0="[INST] {system} [/INST]</s>" #<s>
189
+ template1="[INST] {message} [/INST] "
190
+ template2="{response}</s>"
191
+ if("Mistral-7B-Instruct" in modelPath): #https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
192
+ template0="[INST] {system} [/INST]</s>" #<s>
193
+ template1="[INST] {message} [/INST] "
194
+ template2="{response}</s>"
195
+ if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
196
+ template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
197
+ template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
198
+ template2="{response}<|end_of_turn|>"
199
+ if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
200
+ template0="<|im_start|>system\n{system}<|im_end|>\n"
201
+ template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
202
+ template2="{response}<|im_end|>\n"
203
+ if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
204
+ template0="<|im_start|>system\n{system}<|im_end|>\n"
205
+ template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
206
+ template2="{response}<|im_end|>\n"
207
+ if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
208
+ template0="{system} " #<s>
209
+ template1="USER: {message} ASSISTANT: "
210
+ template2="{response}</s>"
211
+ if("phi-2" in modelPath): #https://huggingface.co/TheBloke/phi-2-GGUF
212
+ template0="Instruct: {system}\nOutput: Okay.\n"
213
+ template1="Instruct: {message}\nOutput:"
214
+ template2="{response}\n"
215
+ prompt = ""
216
+ if RAGAddon is not None:
217
+ system += RAGAddon
218
+ if system is not None:
219
+ prompt += template0.format(system=system) #"<s>"
220
+ if history is not None:
221
+ for user_message, bot_response in history[-historylimit:]:
222
+ if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) #"[INST] {user_prompt} [/INST] "
223
+ if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
224
+ if message is not None: prompt += template1.format(message=message[:zeichenlimit]) #"[INST] {message} [/INST]"
225
+ if system2 is not None:
226
+ prompt += system2
227
+ return prompt
228
+
229
  import gradio as gr
230
+ import requests
231
  import json
232
+ from datetime import datetime
233
+ import os
234
+ import re
235
 
236
+ def response(message, history):
237
+ settings="Temporär"
238
+
239
+ # Preprocessing to revent simple forms of prompt injection:
240
+ #----------------------------------------------------------
241
+
242
+ message=message.replace("[INST]","")
243
+ message=message.replace("[/INST]","")
244
+ message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
245
+
246
+ # Load Memory if settings=="Permanent"
247
+ #-------------------------------------
248
+ if (settings=="Permanent"):
249
+ if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
250
+
251
+ system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem."
252
+
253
+ #RAG-layer 0: Intention-RAG
254
+ #---------------------------
255
+ typeResults=collection.query(
256
+ query_texts=[message],
257
+ n_results=4,
258
+ where={"type": "0"}
259
+ )
260
+ myType=typeResults["metadatas"][0][0]["type2"] # einfachste Variante
261
+ x=[x["type2"] for x in typeResults["metadatas"][0]] # liste die type2-Einträge auf
262
+ myType="1a" if ((x.index("1a") if "1a" in x else len(x)+1) < (x.index("!1a") if "!1a" in x else len(x)+1)) else "else" # setze 1a wenn es besser passt als !1a
263
+ if ((x.index("1b") if "1b" in x else len(x)+1) < (x.index("1a") if "1a" in x else len(x)+1)): # prüfe 1b wenn 1b besser passt als 1a
264
+ if ((x.index("1b") if "1b" in x else len(x)+1) < (x.index("!1b") if "!1b" in x else len(x)+1)): myType="1b" # setze 1b wenn besser als !1b (sonst lass 1a/else)
265
+
266
+ print("Message:"+message+"\n\nIntention-Type: "+myType+"\n\n"+str(typeResults))
267
+
268
+ #RAG-layer 1: Respond with CustomDB-RAG (1a, 1b) or Memory-RAG
269
+ #--------------------------------------------------------------
270
+ rag=None
271
+ historylimit=4
272
+ combination=None
273
+
274
+ ## RAG 1a: Respond with CustomDB-RAG
275
+ #-----------------------------------
276
+ if(myType=="1a"):
277
+
278
+ RAGResults=collection.query(
279
+ query_texts=[message],
280
  n_results=2,
281
+ where={"type": myType}
282
  #where_document={"$contains":"search_string"}
283
  )
284
+ dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in RAGResults['distances'][0]]
285
+ sources=["source: "+s["source"]+")</small>" for s in RAGResults['metadatas'][0]]
286
+ texts=RAGResults['documents'][0]
287
+ combination = zip(texts,dists,sources)
288
  combination = [' '.join(triplets) for triplets in combination]
289
+ #print(combination)
290
+ rag="\n\n"
291
+ rag += "Mit Blick auf die aktuelle Äußerung des Users erinnerst du dich insb. an folgende KI-Verfahren aus unserer Datenbank:\n"
292
+ rag += str(texts)
293
+ rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
294
+
295
+ else:
296
+
297
+ ## RAG 1a: Respond with CustomDB-RAG
298
+ #-----------------------------------
299
+ if(myType=="1b"):
300
+
301
+ RAGResults=collection.query(
302
+ query_texts=[message],
303
+ n_results=2,
304
+ where={"type": myType}
305
+ #where_document={"$contains":"search_string"}
306
+ )
307
+ dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in RAGResults['distances'][0]]
308
+ sources=["source: "+s["source"]+")</small>" for s in RAGResults['metadatas'][0]]
309
+ texts=RAGResults['documents'][0]
310
+ combination = zip(texts,dists,sources)
311
+ combination = [' '.join(triplets) for triplets in combination]
312
+ #print(combination)
313
+ rag="\n\n"
314
+ rag += "Beziehe dich in deiner Antwort AUSSCHLIEßLICH auf die folgenden Informationen:\n"
315
+ rag += str(texts)
316
+ rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
317
+
318
+ ## Else: Respond with Memory-RAG
319
+ #--------------------------------
320
+ else:
321
+
322
+ x=collection.get(include=[])["ids"]
323
+ if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
324
+ RAGResults=collection.query(
325
+ query_texts=[message],
326
+ n_results=1,
327
+ where={"type": "episode"}
328
+ )
329
+ texts=RAGResults["metadatas"][0][0]["dialog"] #str()
330
+ #print("Message: "+message+"\n\nBest Match: "+texts)
331
+ rag="\n\n"
332
+ rag += "Mit Blick auf die aktuelle Äußerung des Users erinnerst du dich insb. an folgende Episode aus eurem Dialog:\n"
333
+ rag += str(texts)
334
+ rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
335
+
336
+ # Request Response from LLM:
337
+ system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
338
+ print("RAG: "+rag)
339
+ print("System: "+system+"\n\nMessage: "+message)
340
+ prompt=extend_prompt(
341
+ message, # current message of the user
342
+ history, # complete history
343
+ system, # system prompt
344
+ rag, # RAG-component added to the system prompt
345
+ system2, # fictive first words of the AI (neither displayed nor stored)
346
+ historylimit=historylimit # number of past messages to consider for response to current message
347
+ )
348
+ print(prompt)
349
+ # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
350
+ url="http://0.0.0.0:2600/v1/completions"
351
+ body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
352
+ if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
353
+ response="" #+"("+myType+")\n"
354
+ buffer=""
355
+ print("URL: "+url)
356
+ print("User: "+message+"\nAI: ")
357
+ for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
358
+ if buffer is None: buffer=""
359
+ buffer=str("".join(buffer))
360
+ # print("*** Raw String: "+str(text)+"\n***\n")
361
+ text=text.decode('utf-8')
362
+ if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
363
+ # print("\n*** Buffer: "+str(buffer)+"\n***\n")
364
+ buffer=buffer.split('"finish_reason": null}]}')
365
+ if(len(buffer)==1):
366
+ buffer="".join(buffer)
367
+ pass
368
+ if(len(buffer)==2):
369
+ part=buffer[0]+'"finish_reason": null}]}'
370
+ if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
371
+ try:
372
+ part = str(json.loads(part)["choices"][0]["text"])
373
+ print(part, end="", flush=True)
374
+ response=response+part
375
+ buffer="" # reset buffer
376
+ except Exception as e:
377
+ print("Exception:"+str(e))
378
+ pass
379
+ yield response
380
+ if((myType=="1a")|(myType=="1b")): #add RAG-results to chat-output if appropriate
381
+ response=response+"\n\n<br><details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
382
+ yield response
383
+ history.append((message, response)) # add current dialog to history
384
+ # Store current state in DB if settings=="Permanent"
385
+ if (settings=="Permanent"):
386
+ x=collection.get(include=[])["ids"] # add current dialog to db
387
+ collection.add(
388
+ documents=[message,response],
389
+ metadatas=[
390
+ { "source": "ICH", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"},
391
+ { "source": "DU", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"}
392
+ ],
393
+ ids=[str(len(x)+1),str(len(x)+2)]
394
+ )
395
+ json.dump(history,open(filename,'w',encoding="utf-8"),ensure_ascii=False)
396
+
397
+ gr.ChatInterface(
398
+ response,
399
+ chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True)
400
+ title="German AI-Interface to the Hugging Face Hub with advanced RAG",
401
+ #additional_inputs=[gr.Dropdown(["Permanent","Temporär"],value="Temporär",label="Dialog sichern?")]
402
+ ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
403
+ print("Interface up and running!")
404
+