Commit
·
4af6d10
1
Parent(s):
f4ee32a
Refactor stop sequences in chat.py for improved template handling
Browse files
src/synthetic_dataset_generator/pipelines/chat.py
CHANGED
@@ -130,11 +130,13 @@ if MAGPIE_PRE_QUERY_TEMPLATE == "llama3":
|
|
130 |
" \n\n",
|
131 |
]
|
132 |
elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
|
|
|
|
|
133 |
_STOP_SEQUENCES = [
|
134 |
-
"<|
|
135 |
-
"<|
|
136 |
"assistant",
|
137 |
-
" \n",
|
138 |
]
|
139 |
|
140 |
|
|
|
130 |
" \n\n",
|
131 |
]
|
132 |
elif MAGPIE_PRE_QUERY_TEMPLATE == "qwen2":
|
133 |
+
_STOP_SEQUENCES = ["<|im_end|>", "<|im_start|>", "assistant", "\n\n"]
|
134 |
+
else:
|
135 |
_STOP_SEQUENCES = [
|
136 |
+
"<|eot_id|>",
|
137 |
+
"<|start_header_id|>",
|
138 |
"assistant",
|
139 |
+
" \n\n",
|
140 |
]
|
141 |
|
142 |
|