pminervini commited on
Commit
238b842
1 Parent(s): a98fabb
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import os
2
  import gradio as gr
3
 
4
- import vllm
5
-
6
  import torch
7
- from transformers import pipeline, StoppingCriteria, StoppingCriteriaList, MaxTimeCriteria, AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizer
8
  from openai import OpenAI
9
 
10
  from elasticsearch import Elasticsearch
@@ -59,6 +57,7 @@ def search(query, index="pubmed", num_docs=3):
59
  return docs
60
 
61
  def analyse(reference: str, passage: str) -> str:
 
62
  fava_input = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "
63
  prompt = [fava_input.format_map({"evidence": reference, "output": passage})]
64
 
@@ -122,7 +121,9 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
122
  print('OAI_RESPONSE', openai_res)
123
  response = openai_res.choices[0].message.content.strip()
124
  else:
125
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, load_in_4bit=True)
 
 
126
  tokenizer = AutoTokenizer.from_pretrained(model_name)
127
 
128
  # Load your language model from HuggingFace Transformers
 
1
  import os
2
  import gradio as gr
3
 
 
 
4
  import torch
5
+ from transformers import pipeline, StoppingCriteria, StoppingCriteriaList, MaxTimeCriteria, AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizer, BitsAndBytesConfig
6
  from openai import OpenAI
7
 
8
  from elasticsearch import Elasticsearch
 
57
  return docs
58
 
59
  def analyse(reference: str, passage: str) -> str:
60
+ import vllm
61
  fava_input = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "
62
  prompt = [fava_input.format_map({"evidence": reference, "output": passage})]
63
 
 
121
  print('OAI_RESPONSE', openai_res)
122
  response = openai_res.choices[0].message.content.strip()
123
  else:
124
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
125
+
126
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
127
  tokenizer = AutoTokenizer.from_pretrained(model_name)
128
 
129
  # Load your language model from HuggingFace Transformers