shalomma commited on
Commit
801051f
·
1 Parent(s): 77b6161

drop 8 bit arg

Browse files
Files changed (1) hide show
  1. handler.py +1 -1
handler.py CHANGED
@@ -11,7 +11,7 @@ MODEL = 'decapoda-research/llama-7b-hf'
11
  class EndpointHandler():
12
  def __init__(self, path=""):
13
  self.tokenizer = llama.LLaMATokenizer.from_pretrained(MODEL)
14
- self.model = llama.LLaMAForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True, load_in_8bit=True)
15
  self.model.to('cuda')
16
 
17
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
 
11
  class EndpointHandler():
12
  def __init__(self, path=""):
13
  self.tokenizer = llama.LLaMATokenizer.from_pretrained(MODEL)
14
+ self.model = llama.LLaMAForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True)
15
  self.model.to('cuda')
16
 
17
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: