Spaces:

PingAndPasquale
/

med-rag

Sleeping

pminervini commited on Mar 2

Commit

238b842

•

1 Parent(s): a98fabb

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import os
 import gradio as gr
-import vllm
 import torch
-from transformers import pipeline, StoppingCriteria, StoppingCriteriaList, MaxTimeCriteria, AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizer
 from openai import OpenAI
 from elasticsearch import Elasticsearch
@@ -59,6 +57,7 @@ def search(query, index="pubmed", num_docs=3):
     return docs
 def analyse(reference: str, passage: str) -> str:
     fava_input = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "
     prompt = [fava_input.format_map({"evidence": reference, "output": passage})]
@@ -122,7 +121,9 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
         print('OAI_RESPONSE', openai_res)
         response = openai_res.choices[0].message.content.strip()
     else:
-        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, load_in_4bit=True)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Load your language model from HuggingFace Transformers

 import os
 import gradio as gr
 import torch
+from transformers import pipeline, StoppingCriteria, StoppingCriteriaList, MaxTimeCriteria, AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizer, BitsAndBytesConfig
 from openai import OpenAI
 from elasticsearch import Elasticsearch
     return docs
 def analyse(reference: str, passage: str) -> str:
+    import vllm
     fava_input = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "
     prompt = [fava_input.format_map({"evidence": reference, "output": passage})]
         print('OAI_RESPONSE', openai_res)
         response = openai_res.choices[0].message.content.strip()
     else:
+        quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Load your language model from HuggingFace Transformers