Spaces:

PingAndPasquale
/

med-rag

Sleeping

pminervini commited on Mar 2

Commit

8400fbd

•

1 Parent(s): 45b79c0

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import gradio as gr
-from transformers import pipeline
 from elasticsearch import Elasticsearch
 # Connect to Elasticsearch
 es = Elasticsearch(hosts=["https://data.neuralnoise.com:9200"],
                    basic_auth=('elastic', os.environ['ES_PASSWORD']),
@@ -58,8 +59,18 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
     for message in messages:
         print('MSG', message)
     # Generate response using the LLM
-    response = generator(messages, max_new_tokens=64, return_full_text=False)
     # Return the generated text and the documents
     return response[0]['generated_text'], joined_docs

 import os
 import gradio as gr
+from transformers import pipeline, StoppingCriteriaList, MaxTimeCriteria
 from elasticsearch import Elasticsearch
 # Connect to Elasticsearch
 es = Elasticsearch(hosts=["https://data.neuralnoise.com:9200"],
                    basic_auth=('elastic', os.environ['ES_PASSWORD']),
     for message in messages:
         print('MSG', message)
+    # Define the stopping criteria using MaxTimeCriteria
+    stopping_criteria = StoppingCriteriaList([MaxTimeCriteria(32)])
+    # Define the generation_kwargs with stopping criteria
+    generation_kwargs = {
+        "max_new_tokens": 128,
+        "generation_kwargs": {"stopping_criteria": stopping_criteria},
+        "return_full_text": False
+    }
     # Generate response using the LLM
+    response = generator(messages, **generation_kwargs)
     # Return the generated text and the documents
     return response[0]['generated_text'], joined_docs