Spaces:

PingAndPasquale
/

med-rag

Sleeping

App Files Files Community

pminervini commited on Mar 2

Commit

6cbdb81

•

1 Parent(s): 3e8dc72

update

Browse files

Files changed (1) hide show

app.py +37 -25

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import torch
 from transformers import pipeline, StoppingCriteriaList, MaxTimeCriteria
 from elasticsearch import Elasticsearch
@@ -16,22 +18,20 @@ def search(query, index="pubmed", num_docs=3):
     Search the Elasticsearch index for the most relevant documents.
     """
-    print(f'Running query: {query}')
-    es_request_body = {
-        "query": {
-            "match": {
-                "content": query  # Assuming documents have a 'content' field
-            }
-        }, "size": num_docs
-    }
-    response = es.options(request_timeout=60).search(index=index, body=es_request_body)
-    # Extract and return the documents
-    docs = [hit["_source"]["content"] for hit in response['hits']['hits']]
-    print(f'Received {len(docs)} documents from index {index}')
     return docs
@@ -67,25 +67,37 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
     # Define the generation_kwargs with stopping criteria
     generation_kwargs = {
         "max_new_tokens": 128,
-        "generation_kwargs": {"stopping_criteria": stopping_criteria},
         "return_full_text": False
     }
-    # Generate response using the LLM
-    response = generator(messages, **generation_kwargs)
-    print('RESPONSE', response)
     # Return the generated text and the documents
-    return response[0]['generated_text'], joined_docs
 # Create the Gradio interface
 iface = gr.Interface(fn=rag_pipeline,
                      inputs=[
                          gr.Textbox(label="Input Prompt", value="Are group 2 innate lymphoid cells (ILC2s) increased in chronic rhinosinusitis with nasal polyps or eosinophilia?"),
-                        gr.Dropdown(label="Index", choices=["pubmed", "wikipedia", "textbooks"], value="pubmed"),
-                        gr.Number(label="Number of Documents", value=3, step=1, minimum=1, maximum=10),
-                         gr.Dropdown(label="Model", choices=["HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf", "meta-llama/Llama-2-70b-chat-hf"], value="HuggingFaceH4/zephyr-7b-beta")
                      ],
                      outputs=[
                          gr.Textbox(label="Generated Text"),

 import torch
 from transformers import pipeline, StoppingCriteriaList, MaxTimeCriteria
+import openai
 from elasticsearch import Elasticsearch
     Search the Elasticsearch index for the most relevant documents.
     """
+    docs = []
+    if num_docs > 0:
+        print(f'Running query: {query}')
+        es_request_body = {
+            "query": {
+                "match": {
+                    "content": query  # Assuming documents have a 'content' field
+                }
+            }, "size": num_docs
+        }
+        response = es.options(request_timeout=60).search(index=index, body=es_request_body)
+        # Extract and return the documents
+        docs = [hit["_source"]["content"] for hit in response['hits']['hits']]
+        print(f'Received {len(docs)} documents from index {index}')
     return docs
     # Define the generation_kwargs with stopping criteria
     generation_kwargs = {
         "max_new_tokens": 128,
+        # "generation_kwargs": {"stopping_criteria": stopping_criteria},
         "return_full_text": False
     }
+    if model_name.startswith('openai/'):
+        openai_model_name = model_name.split('/')[1]
+        openai_prompt = '\n\n'.join([m['content'] for m in messages])
+        openai_res = openai.Completion.create(model=openai_model_name,
+                                       prompt=openai_prompt,
+                                       max_tokens=generation_kwargs["max_new_tokens"],
+                                       n=1,
+                                       stop=None,
+                                       temperature=0)
+        response = openai_res.choices[0].text.strip()
+    else:
+        # Generate response using the HF LLM
+        hf_response = generator(messages, **generation_kwargs)
+        print('HF_RESPONSE', hf_response)
+        response = hf_response[0]['generated_text']
     # Return the generated text and the documents
+    return response, joined_docs
 # Create the Gradio interface
 iface = gr.Interface(fn=rag_pipeline,
                      inputs=[
                          gr.Textbox(label="Input Prompt", value="Are group 2 innate lymphoid cells (ILC2s) increased in chronic rhinosinusitis with nasal polyps or eosinophilia?"),
+                         gr.Dropdown(label="Index", choices=["pubmed", "wikipedia", "textbooks"], value="pubmed"),
+                         gr.Number(label="Number of Documents", value=3, step=1, minimum=0, maximum=10),
+                         gr.Dropdown(label="Model", choices=["HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf", "meta-llama/Llama-2-70b-chat-hf", "openai/gpt-3.5-turbo"], value="HuggingFaceH4/zephyr-7b-beta")
                      ],
                      outputs=[
                          gr.Textbox(label="Generated Text"),