robert commited on
Commit
8a84eb2
·
1 Parent(s): 146bc3a

Changing the model loading to 4bit

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -18,8 +18,9 @@ from transformers import (
18
  )
19
 
20
  tokenizer = AutoTokenizer.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
21
- model = AutoModelForCausalLM.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
22
- model = model.to("cuda:0")
 
23
 
24
 
25
  class OAAPIKey(BaseModel):
@@ -44,7 +45,7 @@ class StopOnSequence(StoppingCriteria):
44
  return False
45
  return (
46
  (
47
- input_ids[0, -self.sequence_len:]
48
  == torch.tensor(self.sequence_ids, device=input_ids.device)
49
  )
50
  .all()
@@ -52,7 +53,7 @@ class StopOnSequence(StoppingCriteria):
52
  )
53
 
54
 
55
- @spaces.GPU(duration=42)
56
  def spaces_model_predict(message: str, history: list[tuple[str, str]]):
57
  history_transformer_format = history + [[message, ""]]
58
  stop = StopOnSequence("<|human|>", tokenizer)
@@ -135,7 +136,7 @@ with gr.Blocks() as demo:
135
  label="Please enter your message",
136
  interactive=True,
137
  multiselect=False,
138
- allow_custom_value=True
139
  )
140
 
141
  with gr.Row():
 
18
  )
19
 
20
  tokenizer = AutoTokenizer.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ "ContextualAI/archangel_sft-kto_llama13b", device_map="auto", load_in_4bit=True
23
+ )
24
 
25
 
26
  class OAAPIKey(BaseModel):
 
45
  return False
46
  return (
47
  (
48
+ input_ids[0, -self.sequence_len :]
49
  == torch.tensor(self.sequence_ids, device=input_ids.device)
50
  )
51
  .all()
 
53
  )
54
 
55
 
56
+ @spaces.GPU(duration=54)
57
  def spaces_model_predict(message: str, history: list[tuple[str, str]]):
58
  history_transformer_format = history + [[message, ""]]
59
  stop = StopOnSequence("<|human|>", tokenizer)
 
136
  label="Please enter your message",
137
  interactive=True,
138
  multiselect=False,
139
+ allow_custom_value=True,
140
  )
141
 
142
  with gr.Row():