Spaces:

llmbb
/

LLMBB-Agent

Running

App Files Files Community

vlff李飞飞 commited on Dec 29, 2023

Commit

fc211c5

1 Parent(s): 0024d65

update oai

Browse files

Files changed (2) hide show

browser_qwen/src/popup.html +3 -3
qwen_agent/llm/qwen_oai.py +15 -18

browser_qwen/src/popup.html CHANGED Viewed

@@ -112,9 +112,9 @@
 <!--    <iframe src=$popup_url style="height: 550px"></iframe>-->
 <div id="iframe_area" style="height: 570px"></div>
-<h3>Customize Address:</h3>
-<input type="text" id="addr" name="addr" class="input-text">
-<button id="set_addr" class="upload_btn">Change</button>
 <script src="popup.js"></script>
 </body>

 <!--    <iframe src=$popup_url style="height: 550px"></iframe>-->
 <div id="iframe_area" style="height: 570px"></div>
+<h3 style="display: none;">Customize Address:</h3>
+<input type="text" id="addr" name="addr" class="input-text" style="display: none;">
+<button id="set_addr" class="upload_btn" style="display: none;">Change</button>
 <script src="popup.js"></script>
 </body>

qwen_agent/llm/qwen_oai.py CHANGED Viewed

@@ -291,8 +291,8 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
             _stop_words_ids.append(s)
     stop_words_ids = _stop_words_ids
-    input_ids = torch.tensor([tokenizer.encode(prompt)]).to(model.device)
-    output = model.generate(input_ids, stop_words_ids=stop_words_ids, **gen_kwargs).tolist()[0]
     output = tokenizer.decode(output, errors="ignore")
     assert output.startswith(prompt)
     output = output[len(prompt):]
@@ -302,7 +302,7 @@ def text_complete_last_message(history, stop_words_ids, gen_kwargs):
 def create_chat_completion(request: ChatCompletionRequest):
-    global model, tokenizer
     gen_kwargs = {}
     if request.temperature is not None:
@@ -333,7 +333,7 @@ def create_chat_completion(request: ChatCompletionRequest):
     if query is _TEXT_COMPLETION_CMD:
         response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
     else:
-        response, _ = model.chat(
             tokenizer,
             query,
             history=history,
@@ -367,7 +367,7 @@ def _dump_json(data: BaseModel, *args, **kwargs) -> str:
 def predict(
         query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
 ):
-    global model, tokenizer
     choice_data = ChatCompletionResponseStreamChoice(
         index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
     )
@@ -381,11 +381,8 @@ def predict(
     stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
     if stop_words:
         # TODO: It's a little bit tricky to trim stop words in the stream mode.
-        raise Exception(
-            status_code=400,
-            detail="Invalid request: custom stop words are not yet supported for stream mode.",
-        )
-    response_generator = model.chat_stream(
         tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
     )
     for new_response in response_generator:
@@ -420,7 +417,7 @@ def predict(
 class QwenChatAsOAI(BaseChatModel):
     def __init__(self, model: str, api_key: str, model_server: str):
-        self.checkpoint_path = copy.copy(model)
         super().__init__()
         tokenizer = AutoTokenizer.from_pretrained(
             self.checkpoint_path,
@@ -429,26 +426,26 @@ class QwenChatAsOAI(BaseChatModel):
         )
         device_map = "cpu"
         # device_map = "auto"
-        model = AutoModelForCausalLM.from_pretrained(
             self.checkpoint_path,
             device_map=device_map,
             trust_remote_code=True,
             resume_download=True,
         ).eval()
-        model.generation_config = GenerationConfig.from_pretrained(
             self.checkpoint_path,
             trust_remote_code=True,
             resume_download=True,
         )
-        self.model = model
     def _chat_stream(
         self,
         messages: List[Dict],
         stop: Optional[List[str]] = None,
     ) -> Iterator[str]:
-        _request = ChatCompletionRequest(model=self.checkpoint_path,
                                                 messages=messages,
                                                 stop=stop,
                                                 stream=True)
@@ -463,7 +460,7 @@ class QwenChatAsOAI(BaseChatModel):
         messages: List[Dict],
         stop: Optional[List[str]] = None,
     ) -> str:
-        _request = ChatCompletionRequest(model=self.checkpoint_path,
                                                 messages=messages,
                                                 stop=stop,
                                                 stream=False)
@@ -475,12 +472,12 @@ class QwenChatAsOAI(BaseChatModel):
                             messages: List[Dict],
                             functions: Optional[List[Dict]] = None) -> Dict:
         if functions:
-            _request = ChatCompletionRequest(model=self.checkpoint_path,
                                                     messages=messages,
                                                     functions=functions)
             response = create_chat_completion(_request)
         else:
-            _request = ChatCompletionRequest(model=self.checkpoint_path,
                                              messages=messages)
             response = create_chat_completion(_request)
         # TODO: error handling

             _stop_words_ids.append(s)
     stop_words_ids = _stop_words_ids
+    input_ids = torch.tensor([tokenizer.encode(prompt)]).to(qmodel.device)
+    output = qmodel.generate(input_ids, stop_words_ids=stop_words_ids, **gen_kwargs).tolist()[0]
     output = tokenizer.decode(output, errors="ignore")
     assert output.startswith(prompt)
     output = output[len(prompt):]
 def create_chat_completion(request: ChatCompletionRequest):
+    global qmodel, tokenizer
     gen_kwargs = {}
     if request.temperature is not None:
     if query is _TEXT_COMPLETION_CMD:
         response = text_complete_last_message(history, stop_words_ids=stop_words_ids, gen_kwargs=gen_kwargs)
     else:
+        response, _ = qmodel.chat(
             tokenizer,
             query,
             history=history,
 def predict(
         query: str, history: List[List[str]], model_id: str, stop_words: List[str], gen_kwargs: Dict,
 ):
+    global qmodel, tokenizer
     choice_data = ChatCompletionResponseStreamChoice(
         index=0, delta=DeltaMessage(role="assistant"), finish_reason=None
     )
     stop_words_ids = [tokenizer.encode(s) for s in stop_words] if stop_words else None
     if stop_words:
         # TODO: It's a little bit tricky to trim stop words in the stream mode.
+        raise Exception("Invalid request: custom stop words are not yet supported for stream mode.",)
+    response_generator = qmodel.chat_stream(
         tokenizer, query, history=history, stop_words_ids=stop_words_ids, **gen_kwargs
     )
     for new_response in response_generator:
 class QwenChatAsOAI(BaseChatModel):
     def __init__(self, model: str, api_key: str, model_server: str):
+        self.model = model
         super().__init__()
         tokenizer = AutoTokenizer.from_pretrained(
             self.checkpoint_path,
         )
         device_map = "cpu"
         # device_map = "auto"
+        qmodel = AutoModelForCausalLM.from_pretrained(
             self.checkpoint_path,
             device_map=device_map,
             trust_remote_code=True,
             resume_download=True,
         ).eval()
+        qmodel.generation_config = GenerationConfig.from_pretrained(
             self.checkpoint_path,
             trust_remote_code=True,
             resume_download=True,
         )
     def _chat_stream(
         self,
         messages: List[Dict],
         stop: Optional[List[str]] = None,
     ) -> Iterator[str]:
+        _request = ChatCompletionRequest(model=self.model,
                                                 messages=messages,
                                                 stop=stop,
                                                 stream=True)
         messages: List[Dict],
         stop: Optional[List[str]] = None,
     ) -> str:
+        _request = ChatCompletionRequest(model=self.model,
                                                 messages=messages,
                                                 stop=stop,
                                                 stream=False)
                             messages: List[Dict],
                             functions: Optional[List[Dict]] = None) -> Dict:
         if functions:
+            _request = ChatCompletionRequest(model=self.model,
                                                     messages=messages,
                                                     functions=functions)
             response = create_chat_completion(_request)
         else:
+            _request = ChatCompletionRequest(model=self.model,
                                              messages=messages)
             response = create_chat_completion(_request)
         # TODO: error handling