Add explanations for why we have adjusted SYSTEM prompt position for xLAM_8x22b tokenizer.

For Mixtral-8x22b-inst-v0.1, move the "SYSTEM" prompt to the beginning to improve conversation flow.
Originally, it appeared toward the end of the prompt, disrupting natural interactions.

Original template:
```
<s>[INST] USER[/INST][TOOL_CALLS] ASSISTANT TOOL CALLS</s>[TOOL_RESULTS] OBSERVATIONS[/TOOL_RESULTS] USER[/INST] ASSISTANT</s>[AVAILABLE_TOOLS] LIST OF TOOLS[/AVAILABLE_TOOLS][INST] SYSTEM PROMPT\n\nUSER[/INST]
```

Files changed (1) hide show

example/xlam_chat_template_examples_11_21.ipynb +10 -8

example/xlam_chat_template_examples_11_21.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -153,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -279,9 +279,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Updated tokenizer for /export/agentstudio-family/checkpoints//xlam_v1/xlam_7b_r\n",
-      "Updated tokenizer for /export/agentstudio-family/checkpoints//xlam_v1/xlam_8x7b_r\n",
-      "Updated tokenizer for /export/agentstudio-family/checkpoints//xlam_v1/xlam_8x22b_r\n"
      ]
     }
    ],
@@ -307,6 +305,10 @@
     "            return \n",
     "    \n",
     "        checkpoint = os.path.join(BASE_XLAM_DIR, checkpoint)\n",
     "        if \"8x22b\" in checkpoint:\n",
     "            original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config_fixed.json\"))\n",
     "        else:\n",
@@ -333,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1437,7 +1439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
    "metadata": {},
    "outputs": [
     {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Tokenizer is already updated for xLAM 1.0 series: /export/agentstudio-family/checkpoints//xlam_v1/xlam_7b_r. Contact ❤❤❤Jianguo Zhang❤❤❤ for more details! \n"
      ]
     }
    ],
     "            return \n",
     "    \n",
     "        checkpoint = os.path.join(BASE_XLAM_DIR, checkpoint)\n",
+    "        \n",
+    "        # For Mixtral-8x22b-inst-v0.1, move the \"SYSTEM\" prompt to the beginning to improve conversation flow. \n",
+    "        # Originally, it appeared toward the end of the prompt, disrupting natural interactions.\n",
+    "        # Original template: \"<s>[INST] USER[/INST][TOOL_CALLS] ASSISTANT TOOL CALLS</s>[TOOL_RESULTS] OBSERVATIONS[/TOOL_RESULTS] USER[/INST] ASSISTANT</s>[AVAILABLE_TOOLS] LIST OF TOOLS[/AVAILABLE_TOOLS][INST] SYSTEM PROMPT\\n\\nUSER[/INST]\"\"\n",
     "        if \"8x22b\" in checkpoint:\n",
     "            original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config_fixed.json\"))\n",
     "        else:\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {