AFischer1985 commited on
Commit
43fdc5b
1 Parent(s): 7151b09

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +50 -19
run.py CHANGED
@@ -38,23 +38,47 @@ print(client.list_collections())
38
  jina_ef=JinaEmbeddingFunction()
39
  embeddingModel=jina_ef
40
 
 
 
 
 
 
 
41
 
42
- from huggingface_hub import InferenceClient
43
- import gradio as gr
44
- import json
45
- inferenceClient = InferenceClient(
46
- "mistralai/Mixtral-8x7B-Instruct-v0.1"
47
- #"mistralai/Mistral-7B-Instruct-v0.1"
48
- )
49
- def format_prompt(message, history):
50
  prompt = "<s>"
51
  #for user_prompt, bot_response in history:
52
  # prompt += f"[INST] {user_prompt} [/INST]"
53
- # prompt += f" {bot_response}</s> "
54
  prompt += f"[INST] {message} [/INST]"
55
  return prompt
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  from pypdf import PdfReader
60
  import ocrmypdf
@@ -159,14 +183,20 @@ def add_doc(path, session):
159
  print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
160
  return(collection)
161
 
 
162
  #split_with_overlap("test me if you can",2,1)
163
  from datetime import date
164
  databases=[(date.today(),"0")] # list of all databases
165
 
 
166
  import gradio as gr
167
  import re
168
- def multimodalResponse(message,history,dropdown, request: gr.Request):
169
  print("def multimodal response!")
 
 
 
 
170
  global databases
171
  if request:
172
  session=request.session_hash
@@ -186,10 +216,7 @@ def multimodalResponse(message,history,dropdown, request: gr.Request):
186
  print(str(client.list_collections()))
187
  x=collection.get(include=[])["ids"]
188
  context=collection.query(query_texts=[query], n_results=1)
189
- print(str(context))
190
- #context=["<context "+str(i+1)+">\n"+c+"\n</context "+str(i+1)+">" for i, c in enumerate(retrievedTexts)]
191
- #context="\n\n".join(context)
192
- #return context
193
  generate_kwargs = dict(
194
  temperature=float(0.9),
195
  max_new_tokens=5000,
@@ -206,13 +233,15 @@ def multimodalResponse(message,history,dropdown, request: gr.Request):
206
  #"Return only your response to the question given the above information "+\
207
  #"following the users instructions as needed.\n\nContext:"+\
208
  print(system)
209
- formatted_prompt = format_prompt(system+"\n"+query, history)
 
 
210
  stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
211
  output = ""
212
  for response in stream:
213
  output += response.token.text
214
  yield output
215
- #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
216
  yield output
217
 
218
  i=gr.ChatInterface(multimodalResponse,
@@ -223,8 +252,10 @@ i=gr.ChatInterface(multimodalResponse,
223
  info="select retrieval version",
224
  choices=["1","2","3"],
225
  value=["1"],
226
- label="Retrieval Version")])
 
 
 
 
227
  i.launch() #allowed_paths=["."])
228
 
229
-
230
-
 
38
  jina_ef=JinaEmbeddingFunction()
39
  embeddingModel=jina_ef
40
 
41
+ #mod="mistralai/Mixtral-8x7b-instruct-v0.1"
42
+ #tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
43
+ #cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
44
+ cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
45
+ #res=tok.apply_chat_template(cha)
46
+ #print(tok.decode(res))
47
 
48
+
49
+ def format_prompt0(message, history):
 
 
 
 
 
 
50
  prompt = "<s>"
51
  #for user_prompt, bot_response in history:
52
  # prompt += f"[INST] {user_prompt} [/INST]"
53
+ # prompt += f" {bot_response}</s> "
54
  prompt += f"[INST] {message} [/INST]"
55
  return prompt
56
 
57
 
58
+ def format_prompt(message, history, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False):
59
+ if zeichenlimit is None: zeichenlimit=1000000000 # :-)
60
+ startOfString="<s>" #<s> [INST] U1 [/INST] A1</s> [INST] U2 [/INST] A2</s>
61
+ template0=" [INST]{system}\n[/INST]</s>"
62
+ template1=" [INST] {message} [/INST]"
63
+ template2=" {response}</s>"
64
+ prompt = ""
65
+ if RAGAddon is not None:
66
+ system += RAGAddon
67
+ if system is not None:
68
+ prompt += template0.format(system=system) #"<s>"
69
+ if history is not None:
70
+ for user_message, bot_response in history[-historylimit:]:
71
+ if user_message is None: user_message = ""
72
+ if bot_response is None: bot_response = ""
73
+ #bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
74
+ if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
75
+ if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
76
+ if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
77
+ if message is not None: prompt += template1.format(message=message[:zeichenlimit])
78
+ if system2 is not None:
79
+ prompt += system2
80
+ return startOfString+prompt
81
+
82
 
83
  from pypdf import PdfReader
84
  import ocrmypdf
 
183
  print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
184
  return(collection)
185
 
186
+
187
  #split_with_overlap("test me if you can",2,1)
188
  from datetime import date
189
  databases=[(date.today(),"0")] # list of all databases
190
 
191
+ from huggingface_hub import InferenceClient
192
  import gradio as gr
193
  import re
194
+ def multimodalResponse(message, history, dropdown, hfToken, request: gr.Request):
195
  print("def multimodal response!")
196
+ if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
197
+ inferenceClient = InferenceClient(model=myModel, token=hfToken)
198
+ else:
199
+ inferenceClient = InferenceClient(myModel)
200
  global databases
201
  if request:
202
  session=request.session_hash
 
216
  print(str(client.list_collections()))
217
  x=collection.get(include=[])["ids"]
218
  context=collection.query(query_texts=[query], n_results=1)
219
+ gr.Info("Kontext:\n"+str(context))
 
 
 
220
  generate_kwargs = dict(
221
  temperature=float(0.9),
222
  max_new_tokens=5000,
 
233
  #"Return only your response to the question given the above information "+\
234
  #"following the users instructions as needed.\n\nContext:"+\
235
  print(system)
236
+ #formatted_prompt = format_prompt0(system+"\n"+query, history)
237
+ formatted_prompt = format_prompt(query, history,system=system)
238
+ print(formated_prompt)
239
  stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
240
  output = ""
241
  for response in stream:
242
  output += response.token.text
243
  yield output
244
+ #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br>"+str(context)+"</details>"
245
  yield output
246
 
247
  i=gr.ChatInterface(multimodalResponse,
 
252
  info="select retrieval version",
253
  choices=["1","2","3"],
254
  value=["1"],
255
+ label="Retrieval Version"),
256
+ gr.Textbox(
257
+ value="",
258
+ label="HF_token"),
259
+ ])
260
  i.launch() #allowed_paths=["."])
261