yuhuili commited on
Commit
21ca00e
1 Parent(s): c2dc54b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -97,6 +97,12 @@ def warmup(model):
97
  @spaces.GPU(duration=60)
98
  def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
99
  model.cuda()
 
 
 
 
 
 
100
  if not history:
101
  return history, "0.00 tokens/s", "0.00", session_state
102
  pure_history = session_state.get("pure_history", [])
@@ -259,7 +265,7 @@ parser.add_argument("--model-type", type=str, default="llama-3-instruct",choices
259
  parser.add_argument(
260
  "--total-token",
261
  type=int,
262
- default=59,
263
  help="The maximum number of new generated tokens.",
264
  )
265
  parser.add_argument(
 
97
  @spaces.GPU(duration=60)
98
  def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
99
  model.cuda()
100
+ warmup_id = torch.tensor([[0,1]]).cuda()
101
+ warmup_hidden= torch.randn(1,2,model.base_model.config.hidden_size).half().cuda()
102
+ out=model.base_model(warmup_id)
103
+ out0=model.ea_layer(warmup_hidden,warmup_id)
104
+ torch.cuda.synchronize()
105
+ del out,out0,warmup_id,warmup_hidden
106
  if not history:
107
  return history, "0.00 tokens/s", "0.00", session_state
108
  pure_history = session_state.get("pure_history", [])
 
265
  parser.add_argument(
266
  "--total-token",
267
  type=int,
268
+ default=64,
269
  help="The maximum number of new generated tokens.",
270
  )
271
  parser.add_argument(