Spaces:
Running
Running
vlff李飞飞
commited on
Commit
•
fc211c5
1
Parent(s):
0024d65
update oai
Browse files- browser_qwen/src/popup.html +3 -3
- qwen_agent/llm/qwen_oai.py +15 -18
browser_qwen/src/popup.html
CHANGED
@@ -112,9 +112,9 @@
|
|
112 |
<!-- <iframe src=$popup_url style="height: 550px"></iframe>-->
|
113 |
<div id="iframe_area" style="height: 570px"></div>
|
114 |
|
115 |
-
<h3>Customize Address:</h3>
|
116 |
-
<input type="text" id="addr" name="addr" class="input-text">
|
117 |
-
<button id="set_addr" class="upload_btn">Change</button>
|
118 |
|
119 |
<script src="popup.js"></script>
|
120 |
</body>
|
|
|
112 |
<!-- <iframe src=$popup_url style="height: 550px"></iframe>-->
|
113 |
<div id="iframe_area" style="height: 570px"></div>
|
114 |
|
115 |
+
<h3 style="display: none;">Customize Address:</h3>
|
116 |
+
<input type="text" id="addr" name="addr" class="input-text" style="display: none;">
|
117 |
+
<button id="set_addr" class="upload_btn" style="display: none;">Change</button>
|
118 |
|
119 |
<script src="popup.js"></script>
|
120 |
</body>
|
qwen_agent/llm/qwen_oai.py
CHANGED
@@ -291,8 +291,8 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
|
|
291 |
_stop_words_ids.append(s)
|
292 |
stop_words_ids = _stop_words_ids
|
293 |
|
294 |
-
input_ids = torch.tensor([tokenizer.encode(prompt)]).to(
|
295 |
-
output =
|
296 |
output = tokenizer.decode(output, errors="ignore")
|
297 |
assert output.startswith(prompt)
|
298 |
output = output[len(prompt):]
|
@@ -302,7 +302,7 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
|
|
302 |
|
303 |
|
304 |
def create_chat_completion(request: ChatCompletionRequest):
|
305 |
-
global
|
306 |
|
307 |
gen_kwargs = {}
|
308 |
if request.temperature is not None:
|
@@ -333,7 +333,7 @@ def create_chat_completion(request: ChatCompletionRequest):
|
|
333 |
if query is _TEXT_COMPLETION_CMD:
|
334 |
response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
|
335 |
else:
|
336 |
-
response, _ =
|
337 |
tokenizer,
|
338 |
query,
|
339 |
history=history,
|
@@ -367,7 +367,7 @@ def _dump_json(data: BaseModel, *args, **kwargs) -> str:
|
|
367 |
def predict(
|
368 |
query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
|
369 |
):
|
370 |
-
global
|
371 |
choice_data = ChatCompletionResponseStreamChoice(
|
372 |
index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
|
373 |
)
|
@@ -381,11 +381,8 @@ def predict(
|
|
381 |
stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
|
382 |
if stop_words:
|
383 |
# TODO: It's a little bit tricky to trim stop words in the stream mode.
|
384 |
-
raise Exception(
|
385 |
-
|
386 |
-
detail="Invalid request: custom stop words are not yet supported for stream mode.",
|
387 |
-
)
|
388 |
-
response_generator = model.chat_stream(
|
389 |
tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
|
390 |
)
|
391 |
for new_response in response_generator:
|
@@ -420,7 +417,7 @@ def predict(
|
|
420 |
class QwenChatAsOAI(BaseChatModel):
|
421 |
|
422 |
def __init__(self, model: str, api_key: str, model_server: str):
|
423 |
-
self.
|
424 |
super().__init__()
|
425 |
tokenizer = AutoTokenizer.from_pretrained(
|
426 |
self.checkpoint_path,
|
@@ -429,26 +426,26 @@ class QwenChatAsOAI(BaseChatModel):
|
|
429 |
)
|
430 |
device_map = "cpu"
|
431 |
# device_map = "auto"
|
432 |
-
|
433 |
self.checkpoint_path,
|
434 |
device_map=device_map,
|
435 |
trust_remote_code=True,
|
436 |
resume_download=True,
|
437 |
).eval()
|
438 |
|
439 |
-
|
440 |
self.checkpoint_path,
|
441 |
trust_remote_code=True,
|
442 |
resume_download=True,
|
443 |
)
|
444 |
-
|
445 |
|
446 |
def _chat_stream(
|
447 |
self,
|
448 |
messages: List[Dict],
|
449 |
stop: Optional[List[str]] = None,
|
450 |
) -> Iterator[str]:
|
451 |
-
_request = ChatCompletionRequest(model=self.
|
452 |
messages=messages,
|
453 |
stop=stop,
|
454 |
stream=True)
|
@@ -463,7 +460,7 @@ class QwenChatAsOAI(BaseChatModel):
|
|
463 |
messages: List[Dict],
|
464 |
stop: Optional[List[str]] = None,
|
465 |
) -> str:
|
466 |
-
_request = ChatCompletionRequest(model=self.
|
467 |
messages=messages,
|
468 |
stop=stop,
|
469 |
stream=False)
|
@@ -475,12 +472,12 @@ class QwenChatAsOAI(BaseChatModel):
|
|
475 |
messages: List[Dict],
|
476 |
functions: Optional[List[Dict]] = None) -> Dict:
|
477 |
if functions:
|
478 |
-
_request = ChatCompletionRequest(model=self.
|
479 |
messages=messages,
|
480 |
functions=functions)
|
481 |
response = create_chat_completion(_request)
|
482 |
else:
|
483 |
-
_request = ChatCompletionRequest(model=self.
|
484 |
messages=messages)
|
485 |
response = create_chat_completion(_request)
|
486 |
# TODO: error handling
|
|
|
291 |
_stop_words_ids.append(s)
|
292 |
stop_words_ids = _stop_words_ids
|
293 |
|
294 |
+
input_ids = torch.tensor([tokenizer.encode(prompt)]).to(qmodel.device)
|
295 |
+
output = qmodel.generate(input_ids, stop_words_ids=stop_words_ids, **gen_kwargs).tolist()[0]
|
296 |
output = tokenizer.decode(output, errors="ignore")
|
297 |
assert output.startswith(prompt)
|
298 |
output = output[len(prompt):]
|
|
|
302 |
|
303 |
|
304 |
def create_chat_completion(request: ChatCompletionRequest):
|
305 |
+
global qmodel, tokenizer
|
306 |
|
307 |
gen_kwargs = {}
|
308 |
if request.temperature is not None:
|
|
|
333 |
if query is _TEXT_COMPLETION_CMD:
|
334 |
response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
|
335 |
else:
|
336 |
+
response, _ = qmodel.chat(
|
337 |
tokenizer,
|
338 |
query,
|
339 |
history=history,
|
|
|
367 |
def predict(
|
368 |
query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
|
369 |
):
|
370 |
+
global qmodel, tokenizer
|
371 |
choice_data = ChatCompletionResponseStreamChoice(
|
372 |
index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
|
373 |
)
|
|
|
381 |
stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
|
382 |
if stop_words:
|
383 |
# TODO: It's a little bit tricky to trim stop words in the stream mode.
|
384 |
+
raise Exception("Invalid request: custom stop words are not yet supported for stream mode.",)
|
385 |
+
response_generator = qmodel.chat_stream(
|
|
|
|
|
|
|
386 |
tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
|
387 |
)
|
388 |
for new_response in response_generator:
|
|
|
417 |
class QwenChatAsOAI(BaseChatModel):
|
418 |
|
419 |
def __init__(self, model: str, api_key: str, model_server: str):
|
420 |
+
self.model = model
|
421 |
super().__init__()
|
422 |
tokenizer = AutoTokenizer.from_pretrained(
|
423 |
self.checkpoint_path,
|
|
|
426 |
)
|
427 |
device_map = "cpu"
|
428 |
# device_map = "auto"
|
429 |
+
qmodel = AutoModelForCausalLM.from_pretrained(
|
430 |
self.checkpoint_path,
|
431 |
device_map=device_map,
|
432 |
trust_remote_code=True,
|
433 |
resume_download=True,
|
434 |
).eval()
|
435 |
|
436 |
+
qmodel.generation_config = GenerationConfig.from_pretrained(
|
437 |
self.checkpoint_path,
|
438 |
trust_remote_code=True,
|
439 |
resume_download=True,
|
440 |
)
|
441 |
+
|
442 |
|
443 |
def _chat_stream(
|
444 |
self,
|
445 |
messages: List[Dict],
|
446 |
stop: Optional[List[str]] = None,
|
447 |
) -> Iterator[str]:
|
448 |
+
_request = ChatCompletionRequest(model=self.model,
|
449 |
messages=messages,
|
450 |
stop=stop,
|
451 |
stream=True)
|
|
|
460 |
messages: List[Dict],
|
461 |
stop: Optional[List[str]] = None,
|
462 |
) -> str:
|
463 |
+
_request = ChatCompletionRequest(model=self.model,
|
464 |
messages=messages,
|
465 |
stop=stop,
|
466 |
stream=False)
|
|
|
472 |
messages: List[Dict],
|
473 |
functions: Optional[List[Dict]] = None) -> Dict:
|
474 |
if functions:
|
475 |
+
_request = ChatCompletionRequest(model=self.model,
|
476 |
messages=messages,
|
477 |
functions=functions)
|
478 |
response = create_chat_completion(_request)
|
479 |
else:
|
480 |
+
_request = ChatCompletionRequest(model=self.model,
|
481 |
messages=messages)
|
482 |
response = create_chat_completion(_request)
|
483 |
# TODO: error handling
|