Hansimov commited on
Commit
8697de8
1 Parent(s): 51ebd18

:boom: [Fix] slow tokenizer issue, and finish by stop_sequences

Browse files
messagers/message_composer.py CHANGED
@@ -152,7 +152,10 @@ class MessageComposer:
152
  # https://huggingface.co/openchat/openchat-3.5-0106
153
  # https://huggingface.co/01-ai/Yi-1.5-34B-Chat
154
  elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
155
- tokenizer = AutoTokenizer.from_pretrained(self.model_fullname)
 
 
 
156
  self.merged_str = tokenizer.apply_chat_template(
157
  messages, tokenize=False, add_generation_prompt=True
158
  )
 
152
  # https://huggingface.co/openchat/openchat-3.5-0106
153
  # https://huggingface.co/01-ai/Yi-1.5-34B-Chat
154
  elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
155
+ # https://discuss.huggingface.co/t/error-with-new-tokenizers-urgent/2847/5
156
+ tokenizer = AutoTokenizer.from_pretrained(
157
+ self.model_fullname, use_fast=False
158
+ )
159
  self.merged_str = tokenizer.apply_chat_template(
160
  messages, tokenize=False, add_generation_prompt=True
161
  )
networks/huggingface_streamer.py CHANGED
@@ -157,15 +157,15 @@ class HuggingfaceStreamer:
157
 
158
  content = self.parse_line(line)
159
 
160
- if content.strip() == self.stop_sequences:
161
- content_type = "Finished"
162
- logger.success("\n[Finished]")
163
- is_finished = True
164
- else:
165
- content_type = "Completions"
166
- if line_count == 1:
167
- content = content.lstrip()
168
- logger.back(content, end="")
169
 
170
  output = self.message_outputer.output(
171
  content=content, content_type=content_type
 
157
 
158
  content = self.parse_line(line)
159
 
160
+ # if content.strip() == self.stop_sequences:
161
+ # content_type = "Finished"
162
+ # logger.success("\n[Finished]")
163
+ # is_finished = True
164
+ # else:
165
+ content_type = "Completions"
166
+ if line_count == 1:
167
+ content = content.lstrip()
168
+ logger.back(content, end="")
169
 
170
  output = self.message_outputer.output(
171
  content=content, content_type=content_type