vlff李飞飞 commited on
Commit
4ec8897
1 Parent(s): dc8d3c6
Files changed (1) hide show
  1. qwen_agent/llm/qwen_oai.py +9 -9
qwen_agent/llm/qwen_oai.py CHANGED
@@ -301,8 +301,7 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
301
  return output
302
 
303
 
304
- def create_chat_completion(request: ChatCompletionRequest):
305
- global qmodel, tokenizer
306
 
307
  gen_kwargs = {}
308
  if request.temperature is not None:
@@ -325,7 +324,7 @@ def create_chat_completion(request: ChatCompletionRequest):
325
  if request.stream:
326
  if request.functions:
327
  raise Exception("Invalid request: Function calling is not yet implemented for stream mode.")
328
- generate = predict(query, history, request.model, stop_words, gen_kwargs)
329
  return generate
330
  # return EventSourceResponse(generate, media_type="text/event-stream")
331
 
@@ -365,9 +364,8 @@ def _dump_json(data: BaseModel, *args, **kwargs) -> str:
365
 
366
 
367
  def predict(
368
- query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
369
  ):
370
- global qmodel, tokenizer
371
  choice_data = ChatCompletionResponseStreamChoice(
372
  index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
373
  )
@@ -438,6 +436,8 @@ class QwenChatAsOAI(BaseChatModel):
438
  trust_remote_code=True,
439
  resume_download=True,
440
  )
 
 
441
 
442
  def _chat_stream(
443
  self,
@@ -448,7 +448,7 @@ class QwenChatAsOAI(BaseChatModel):
448
  messages=messages,
449
  stop=stop,
450
  stream=True)
451
- response = create_chat_completion(_request)
452
  # TODO: error handling
453
  for chunk in response:
454
  if hasattr(chunk.choices[0].delta, 'content'):
@@ -460,7 +460,7 @@ class QwenChatAsOAI(BaseChatModel):
460
  stop: Optional[List[str]] = None,
461
  ) -> str:
462
  _request = ChatCompletionRequest(model=self.model, messages=messages, stop=stop, stream=False)
463
- response = create_chat_completion(_request)
464
  # TODO: error handling
465
  return response.choices[0].message.content
466
 
@@ -469,10 +469,10 @@ class QwenChatAsOAI(BaseChatModel):
469
  functions: Optional[List[Dict]] = None) -> Dict:
470
  if functions:
471
  _request = ChatCompletionRequest(model=self.model, messages=messages, functions=functions)
472
- response = create_chat_completion(_request)
473
  else:
474
  _request = ChatCompletionRequest(model=self.model, messages=messages)
475
- response = create_chat_completion(_request)
476
  # TODO: error handling
477
  return response.choices[0].message.model_dump()
478
 
 
301
  return output
302
 
303
 
304
+ def create_chat_completion(request: ChatCompletionRequest, qmodel, tokenizer):
 
305
 
306
  gen_kwargs = {}
307
  if request.temperature is not None:
 
324
  if request.stream:
325
  if request.functions:
326
  raise Exception("Invalid request: Function calling is not yet implemented for stream mode.")
327
+ generate = predict(query, history, request.model, stop_words, gen_kwargs, qmodel, tokenizer)
328
  return generate
329
  # return EventSourceResponse(generate, media_type="text/event-stream")
330
 
 
364
 
365
 
366
  def predict(
367
+ query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict, qmodel, tokenizer
368
  ):
 
369
  choice_data = ChatCompletionResponseStreamChoice(
370
  index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
371
  )
 
436
  trust_remote_code=True,
437
  resume_download=True,
438
  )
439
+ self.qmodel = qmodel
440
+ self.tokenizer = tokenizer
441
 
442
  def _chat_stream(
443
  self,
 
448
  messages=messages,
449
  stop=stop,
450
  stream=True)
451
+ response = create_chat_completion(_request, self.qmodel, self.tokenizer)
452
  # TODO: error handling
453
  for chunk in response:
454
  if hasattr(chunk.choices[0].delta, 'content'):
 
460
  stop: Optional[List[str]] = None,
461
  ) -> str:
462
  _request = ChatCompletionRequest(model=self.model, messages=messages, stop=stop, stream=False)
463
+ response = create_chat_completion(_request, self.qmodel, self.tokenizer)
464
  # TODO: error handling
465
  return response.choices[0].message.content
466
 
 
469
  functions: Optional[List[Dict]] = None) -> Dict:
470
  if functions:
471
  _request = ChatCompletionRequest(model=self.model, messages=messages, functions=functions)
472
+ response = create_chat_completion(_request, self.qmodel, self.tokenizer)
473
  else:
474
  _request = ChatCompletionRequest(model=self.model, messages=messages)
475
+ response = create_chat_completion(_request, self.qmodel, self.tokenizer)
476
  # TODO: error handling
477
  return response.choices[0].message.model_dump()
478