davidberenstein1957 HF staff commited on
Commit
4af6d10
·
1 Parent(s): f4ee32a

Refactor stop sequences in chat.py for improved template handling

Browse files
src/synthetic_dataset_generator/pipelines/chat.py CHANGED
@@ -130,11 +130,13 @@ if MAGPIE_PRE_QUERY_TEMPLATE == "llama3":
130
  " \n\n",
131
  ]
132
  elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
 
 
133
  _STOP_SEQUENCES = [
134
- "<|im_end|>",
135
- "<|im_start|>",
136
  "assistant",
137
- " \n",
138
  ]
139
 
140
 
 
130
  " \n\n",
131
  ]
132
  elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
133
+ _STOP_SEQUENCES = ["<|im_end|>", "<|im_start|>", "assistant", "\n\n"]
134
+ else:
135
  _STOP_SEQUENCES = [
136
+ "<|eot_id|>",
137
+ "<|start_header_id|>",
138
  "assistant",
139
+ " \n\n",
140
  ]
141
 
142