muryshev commited on
Commit
f5355b8
1 Parent(s): 026d071

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -71,20 +71,20 @@ stop_generation = False
71
  def generate_tokens(model, generator):
72
  global stop_generation
73
  app.logger.info('generate_tokens started')
74
- #with lock:
75
- try:
76
- for token in generator:
77
- if token == model.token_eos() or stop_generation:
78
- stop_generation = False
79
- app.logger.info('Abort generating')
80
- yield b'' # End of chunk
81
- break
82
-
83
- token_str = model.detokenize([token])#.decode("utf-8", errors="ignore")
84
- yield token_str
85
- except Exception as e:
86
- app.logger.info('generator exception')
87
- yield b'' # End of chunk
88
 
89
  @app.route('/stop_generation', methods=['GET'])
90
  def handler_stop_generation():
@@ -133,7 +133,7 @@ def generate_search_request():
133
  logits_all=True,
134
  #n_threads=12,
135
  verbose=True,
136
- n_gpu_layers=40,
137
  n_gqa=8 #must be set for 70b models
138
  )
139
 
@@ -183,7 +183,7 @@ def generate_response():
183
  logits_all=True,
184
  #n_threads=12,
185
  verbose=True,
186
- n_gpu_layers=40,
187
  n_gqa=8 #must be set for 70b models
188
  )
189
 
@@ -239,4 +239,4 @@ def generate_response():
239
  return Response(generate_tokens(model, generator), content_type='text/plain', status=200, direct_passthrough=True)
240
 
241
  if __name__ == "__main__":
242
- app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)
 
71
  def generate_tokens(model, generator):
72
  global stop_generation
73
  app.logger.info('generate_tokens started')
74
+ with lock:
75
+ try:
76
+ for token in generator:
77
+ if token == model.token_eos() or stop_generation:
78
+ stop_generation = False
79
+ app.logger.info('Abort generating')
80
+ yield b'' # End of chunk
81
+ break
82
+
83
+ token_str = model.detokenize([token])#.decode("utf-8", errors="ignore")
84
+ yield token_str
85
+ except Exception as e:
86
+ app.logger.info('generator exception')
87
+ yield b'' # End of chunk
88
 
89
  @app.route('/stop_generation', methods=['GET'])
90
  def handler_stop_generation():
 
133
  logits_all=True,
134
  #n_threads=12,
135
  verbose=True,
136
+ n_gpu_layers=30,
137
  n_gqa=8 #must be set for 70b models
138
  )
139
 
 
183
  logits_all=True,
184
  #n_threads=12,
185
  verbose=True,
186
+ n_gpu_layers=30,
187
  n_gqa=8 #must be set for 70b models
188
  )
189
 
 
239
  return Response(generate_tokens(model, generator), content_type='text/plain', status=200, direct_passthrough=True)
240
 
241
  if __name__ == "__main__":
242
+ app.run(host="0.0.0.0", port=7860, debug=False, threaded=False)