Yash Sachdeva commited on
Commit
18dd69a
·
1 Parent(s): 5bc3efc
Files changed (1) hide show
  1. question_paper.py +9 -6
question_paper.py CHANGED
@@ -10,24 +10,27 @@ app = FastAPI()
10
  MODEL = None
11
  TOKENIZER = None
12
 
13
-
14
  @app.get("/")
15
  def llama():
16
  text = "Hi, my name is "
17
- inputs = TOKENIZER(text, return_tensors="pt")
18
- outputs = MODEL.generate(**inputs, max_new_tokens=64)
 
 
 
 
 
19
  tresponse = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
20
  print(tresponse)
21
 
22
  return tresponse
23
 
24
-
25
  @app.on_event("startup")
26
  def init_model():
27
  global MODEL
28
  global TOKENIZER
29
  if not MODEL:
30
  print("loading model")
31
- TOKENIZER = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0")
32
- MODEL = AutoModelForCausalLM.from_pretrained("Upstage/SOLAR-10.7B-v1.0", device_map="auto", torch_dtype=torch.float16,)
33
  print("loaded model")
 
10
  MODEL = None
11
  TOKENIZER = None
12
 
 
13
  @app.get("/")
14
  def llama():
15
  text = "Hi, my name is "
16
+ inputs = TOKENIZER(text, return_tensors="pt").input_ids
17
+ outputs = MODEL.generate(
18
+ inputs,
19
+ max_length=256,
20
+ pad_token_id=TOKENIZER.pad_token_id,
21
+ eos_token_id=TOKENIZER.eos_token_id,
22
+ )
23
  tresponse = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
24
  print(tresponse)
25
 
26
  return tresponse
27
 
 
28
  @app.on_event("startup")
29
  def init_model():
30
  global MODEL
31
  global TOKENIZER
32
  if not MODEL:
33
  print("loading model")
34
+ TOKENIZER = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
35
+ MODEL = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
36
  print("loaded model")