Frag-dein-PDF

Running

App Files Files Community

AFischer1985 commited on 29 days ago

Commit

a97d3f8

•

1 Parent(s): 52823c9

Update run.py

Browse files

Files changed (1) hide show

run.py +12 -98

run.py CHANGED Viewed

@@ -2,11 +2,10 @@
 # Title:  Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
 # Author: Andreas Fischer
 # Date:   October 10th, 2024
-# Last update: October 24th, 2024
 ##########################################################################################
 import os
 import torch
 from transformers import AutoTokenizer, AutoModel # chromaDB
 from datetime import datetime, date #add_doc,
@@ -26,7 +25,9 @@ from huggingface_hub import InferenceClient #multimodal_response
 myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
 #myModel="princeton-nlp/gemma-2-9b-it-SimPO"
-#mod="mistralai/Mixtral-8x7b-instruct-v0.1"
 #tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
 #cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
 #cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
@@ -82,8 +83,9 @@ def format_prompt0(message, history):
 #-------------------------------------------------------------------------
 def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
-  #startOfString="",template0="<start_of_turn>user\n{system}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n",template1="<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n",template2="<end_of_turn>\n"):
-  startOfString="<s>", template0=" [INST] {system} [/INST] </s>",template1=" [INST] {message} [/INST]",template2=" {response}</s>"):
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   prompt = ""
   if RAGAddon is not None:
@@ -222,7 +224,7 @@ def add_doc(path, session):
       if(len(doc[0])>5):
         if(not "cuda" in device):
           doc="\n\n".join(doc[0][0:5])
-          gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (first 5 pages on CPU setups)!")
         else:
           doc="\n\n".join(doc[0])
           gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
@@ -254,105 +256,17 @@ def add_doc(path, session):
     print(len(x))
     if(len(x)==0):
       chunkSize=40000
-      for i in range(round(len(corpus)/chunkSize+0.5)): #0 is first batch, 3 is last (incomplete) batch given 133497 texts
         print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
         ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
         batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
         textIDs=[str(id) for id in ids[0:len(batch)]]
-        ids=[str(id+len(x)+1) for id in ids[0:len(batch)]] # id refers to chromadb-unique ID
         collection.add(documents=batch, ids=ids,
-          metadatas=[{"date": str("2024-10-10")} for b in batch]) #"textID":textIDs, "id":ids,
         print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
     now = datetime.now()
     gr.Info(f"Indexing complete!")
-    print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
   return(collection)
-#--------------------------------------------------------
-# Function for response to user queries and pot. addenda
-#--------------------------------------------------------
-def multimodal_response(message, history, dropdown, hfToken, request: gr.Request):
-  print("def multimodal response!")
-  if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
-    inferenceClient = InferenceClient(model=myModel, token=hfToken)
-  else:
-    inferenceClient = InferenceClient(myModel)
-  global databases
-  if request:
-    session=request.session_hash
-  else:
-    session="0"
-  length=str(len(history))
-  print(databases)
-  if(not databases[-1][1]==session):
-    databases.append((date.today(),session))
-    #print(databases)
-  query=message["text"]
-  if(len(message["files"])>0): # is there at least one file attached?
-    collection=add_doc(message["files"][0], session)
-  else: # otherwise, you still want to get the collection with the session-based db
-    collection=add_doc(message["text"], session)
-  client = chromadb.PersistentClient(path=dbPath)
-  print(str(client.list_collections()))
-  x=collection.get(include=[])["ids"]
-  ragQuery=[format_prompt(query, history) if len(history)>0 else query]
-  context=collection.query(query_texts=ragQuery, n_results=3)
-  context=["<Kontext "+str(i)+"> "+str(c)+"</Kontext "+str(i)+">" for i,c in enumerate(context["documents"][0])]
-  gr.Info("Kontext:\n"+str(context))
-  generate_kwargs = dict(
-        temperature=float(0.9),
-        max_new_tokens=5000,
-        top_p=0.95,
-        repetition_penalty=1.0,
-        do_sample=True,
-        seed=42,
-  )
-  system="Mit Blick auf das folgende Gespräch und den relevanten Kontext, antworte auf die aktuelle Frage des Nutzers. "+\
-  "Antworte ausschließlich auf Basis der Informationen im Kontext.\n\nKontext:\n\n"+\
-  str("\n\n".join(context))
-  #"Given the following conversation, relevant context, and a follow up question, "+\
-  #"reply with an answer to the current question the user is asking. "+\
-  #"Return only your response to the question given the above information "+\
-  #"following the users instructions as needed.\n\nContext:"+\
-  print(system)
-  #formatted_prompt = format_prompt0(system+"\n"+query, history)
-  formatted_prompt = format_prompt(query, history,system=system)
-  print(formatted_prompt)
-  output = ""
-  try:
-    stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    for response in stream:
-      output += response.token.text
-      yield output
-  except Exception as e:
-    output = "Für weitere Antworten von der KI gebe bitte einen gültigen HuggingFace-Token an."
-    if(len(context)>0):
-      output += "\nBis dahin helfen dir hoffentlich die folgenden Quellen weiter:"
-    yield output
-    print(str(e))
-  if(len(context)>0):
-    output=output+"\n\n<br><details open><summary><strong>Quellen</strong></summary><br><ul>"+ "".join(["<li>" + c + "</li>" for c in context])+"</ul></details>"
-  yield output
-#------------------------------
-# Launch Gradio-ChatInterface
-#------------------------------
-i=gr.ChatInterface(multimodal_response,
-  title="Frag dein PDF",
-  multimodal=True,
-  additional_inputs=[
-    gr.Dropdown(
-      info="Wähle eine Variante",
-      choices=["1","2","3"],
-      value="1",
-      label="Variante"),
-           gr.Textbox(
-      value="",
-      label="HF_token"),
-  ])
-i.launch() #allowed_paths=["."])

 # Title:  Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
 # Author: Andreas Fischer
 # Date:   October 10th, 2024
+# Last update: October 25th, 2024
 ##########################################################################################
 import os
 import torch
 from transformers import AutoTokenizer, AutoModel # chromaDB
 from datetime import datetime, date #add_doc,
 myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
 #myModel="princeton-nlp/gemma-2-9b-it-SimPO"
+#myModel="google/gemma-2-2b-it"
+#myModel="meta-llama/Llama-3.1-8B-Instruct"
+#mod=myModel
 #tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
 #cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
 #cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
 #-------------------------------------------------------------------------
 def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
+  startOfString="<s>", template0=" [INST] {system} [/INST] </s>",template1=" [INST] {message} [/INST]",template2=" {response}</s>"): # mistralai/Mixtral-8x7B-Instruct-v0.1
+  #startOfString="<bos>",template0="<start_of_turn>user\n{system}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n",template1="<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n",template2="<end_of_turn>\n"): # google/gemma-2-2b-it
+  #startOfString="", template0="<|start_header_id|>system<|end_header_id|>\n\n{system}\n<|eot_id|>", template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>", template2="<|start_header_id|>assistant<|end_header_id|>\n\n{response}</eot_id>"): # meta-llama/Llama-3.1-8B-Instruct?
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   prompt = ""
   if RAGAddon is not None:
       if(len(doc[0])>5):
         if(not "cuda" in device):
           doc="\n\n".join(doc[0][0:5])
+          gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (demo-mode: first 5 pages on CPU setups)!")
         else:
           doc="\n\n".join(doc[0])
           gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
     print(len(x))
     if(len(x)==0):
       chunkSize=40000
+      for i in range(round(len(corpus)/chunkSize+0.5)):
         print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
         ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
         batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
         textIDs=[str(id) for id in ids[0:len(batch)]]
+        ids=[str(id+len(x)+1) for id in ids[0:len(batch)]]
         collection.add(documents=batch, ids=ids,
+          metadatas=[{"date": str("2024-10-10")} for b in batch])
         print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
     now = datetime.now()
     gr.Info(f"Indexing complete!")
+    print(now-then)
   return(collection)