diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb"
--- "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb"
+++ "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb"
@@ -3,13 +3,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "e6bf5eac",
+   "id": "83b29667",
    "metadata": {
     "papermill": {
-     "duration": 0.005682,
-     "end_time": "2023-09-14T02:37:04.293470",
+     "duration": 0.004177,
+     "end_time": "2023-09-14T04:09:42.595329",
      "exception": false,
-     "start_time": "2023-09-14T02:37:04.287788",
+     "start_time": "2023-09-14T04:09:42.591152",
      "status": "completed"
     },
     "tags": []
@@ -25,13 +25,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "f59fa274",
+   "id": "e81e130e",
    "metadata": {
     "papermill": {
-     "duration": 0.003026,
-     "end_time": "2023-09-14T02:37:04.300149",
+     "duration": 0.00248,
+     "end_time": "2023-09-14T04:09:42.600743",
      "exception": false,
-     "start_time": "2023-09-14T02:37:04.297123",
+     "start_time": "2023-09-14T04:09:42.598263",
      "status": "completed"
     },
     "tags": []
@@ -43,19 +43,19 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "b9505f51",
+   "id": "30eb35b6",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:04.308476Z",
-     "iopub.status.busy": "2023-09-14T02:37:04.307936Z",
-     "iopub.status.idle": "2023-09-14T02:37:05.309488Z",
-     "shell.execute_reply": "2023-09-14T02:37:05.308127Z"
+     "iopub.execute_input": "2023-09-14T04:09:42.604796Z",
+     "iopub.status.busy": "2023-09-14T04:09:42.604323Z",
+     "iopub.status.idle": "2023-09-14T04:09:43.475229Z",
+     "shell.execute_reply": "2023-09-14T04:09:43.474410Z"
     },
     "papermill": {
-     "duration": 1.008439,
-     "end_time": "2023-09-14T02:37:05.311918",
+     "duration": 0.874952,
+     "end_time": "2023-09-14T04:09:43.477084",
      "exception": false,
-     "start_time": "2023-09-14T02:37:04.303479",
+     "start_time": "2023-09-14T04:09:42.602132",
      "status": "completed"
     },
     "tags": []
@@ -83,19 +83,19 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "8d16737a",
+   "id": "62846b47",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:05.320892Z",
-     "iopub.status.busy": "2023-09-14T02:37:05.319751Z",
-     "iopub.status.idle": "2023-09-14T02:37:08.625564Z",
-     "shell.execute_reply": "2023-09-14T02:37:08.624420Z"
+     "iopub.execute_input": "2023-09-14T04:09:43.484470Z",
+     "iopub.status.busy": "2023-09-14T04:09:43.484106Z",
+     "iopub.status.idle": "2023-09-14T04:09:45.594235Z",
+     "shell.execute_reply": "2023-09-14T04:09:45.593478Z"
     },
     "papermill": {
-     "duration": 3.312981,
-     "end_time": "2023-09-14T02:37:08.627991",
+     "duration": 2.115687,
+     "end_time": "2023-09-14T04:09:45.595985",
      "exception": false,
-     "start_time": "2023-09-14T02:37:05.315010",
+     "start_time": "2023-09-14T04:09:43.480298",
      "status": "completed"
     },
     "tags": []
@@ -118,19 +118,19 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "157915c9",
+   "id": "8b76286e",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:08.636451Z",
-     "iopub.status.busy": "2023-09-14T02:37:08.635808Z",
-     "iopub.status.idle": "2023-09-14T02:37:08.646285Z",
-     "shell.execute_reply": "2023-09-14T02:37:08.644957Z"
+     "iopub.execute_input": "2023-09-14T04:09:45.603947Z",
+     "iopub.status.busy": "2023-09-14T04:09:45.603582Z",
+     "iopub.status.idle": "2023-09-14T04:09:45.612565Z",
+     "shell.execute_reply": "2023-09-14T04:09:45.611873Z"
     },
     "papermill": {
-     "duration": 0.017165,
-     "end_time": "2023-09-14T02:37:08.648279",
+     "duration": 0.01509,
+     "end_time": "2023-09-14T04:09:45.614344",
      "exception": false,
-     "start_time": "2023-09-14T02:37:08.631114",
+     "start_time": "2023-09-14T04:09:45.599254",
      "status": "completed"
     },
     "tags": []
@@ -197,20 +197,18 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "ed6bf7ff",
+   "id": "bf170d72",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:08.656415Z",
-     "iopub.status.busy": "2023-09-14T02:37:08.655852Z",
-     "iopub.status.idle": "2023-09-14T02:37:32.430048Z",
-     "shell.execute_reply": "2023-09-14T02:37:32.428850Z"
+     "iopub.execute_input": "2023-09-14T04:09:45.622383Z",
+     "iopub.status.busy": "2023-09-14T04:09:45.621825Z"
     },
     "papermill": {
-     "duration": 23.781188,
-     "end_time": "2023-09-14T02:37:32.432519",
+     "duration": null,
+     "end_time": null,
      "exception": false,
-     "start_time": "2023-09-14T02:37:08.651331",
-     "status": "completed"
+     "start_time": "2023-09-14T04:09:45.618141",
+     "status": "running"
     },
     "tags": []
    },
@@ -219,12 +217,19 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2023-09-14 02:37:08--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.87, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n",
-      "HTTP request sent, awaiting response... 302 Found\r\n",
-      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
-      "--2023-09-14 02:37:08--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "--2023-09-14 04:09:45--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... 13.33.33.20, 13.33.33.55, 13.33.33.110, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|13.33.33.20|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "302 Found\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694923785&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkyMzc4NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AzuHbkYMQgD0pHP1CFX1tdK-cjGb4Y8LvRXLbAmb9kT0d-Ldc2HT5Sr%7EokrLW-0-EumdDAJHoXxv-2KjYIVh702%7EN2hmQ5TxXxSZinQa%7EBXn9iUEVcmUvmqCtQ6b-4WiKBLmLPPzodpRs7hB0oXKrMI0rYgAhw6ue1xCaRlraEb75ZWUio0oP122zwSYVC3pXuhIsFkUUI0mONTWh5r4weCBpAXNgRFgKkR4cSm9yXJb6519Fy-nCqbXrmKDJdami5QOa--SPZ3bBwK7MJdZnMe2Ekl1DKwKEzbIHRiJ1vmap2oVTW3Yj2LjB8qTWbmUSNgc8DLKevXf2GyW9qhXHQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-14 04:09:46--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694923785&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkyMzc4NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AzuHbkYMQgD0pHP1CFX1tdK-cjGb4Y8LvRXLbAmb9kT0d-Ldc2HT5Sr%7EokrLW-0-EumdDAJHoXxv-2KjYIVh702%7EN2hmQ5TxXxSZinQa%7EBXn9iUEVcmUvmqCtQ6b-4WiKBLmLPPzodpRs7hB0oXKrMI0rYgAhw6ue1xCaRlraEb75ZWUio0oP122zwSYVC3pXuhIsFkUUI0mONTWh5r4weCBpAXNgRFgKkR4cSm9yXJb6519Fy-nCqbXrmKDJdami5QOa--SPZ3bBwK7MJdZnMe2Ekl1DKwKEzbIHRiJ1vmap2oVTW3Yj2LjB8qTWbmUSNgc8DLKevXf2GyW9qhXHQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
       "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
      ]
     },
@@ -232,8 +237,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "108.138.64.49, 108.138.64.121, 108.138.64.111, ...\r\n",
-      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.49|:443... connected.\r\n",
+      "18.155.68.73, 18.155.68.98, 18.155.68.94, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.73|:443... connected.\r\n",
       "HTTP request sent, awaiting response... "
      ]
     },
@@ -254,7 +259,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5r3-L6-D2   1%[                    ]  14.74M  63.3MB/s               "
+      "         v5r3-L6-D2   0%[                    ]  26.24K   111KB/s               "
      ]
     },
     {
@@ -262,7 +267,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5r3-L6-D20   2%[                    ]  24.19M  47.2MB/s               "
+      "        v5r3-L6-D20   0%[                    ]  60.24K   128KB/s               "
      ]
     },
     {
@@ -270,7 +275,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5r3-L6-D204   3%[                    ]  30.52M  38.2MB/s               "
+      "       v5r3-L6-D204   0%[                    ] 111.24K   157KB/s               "
      ]
     },
     {
@@ -278,7 +283,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5r3-L6-D2048   4%[                    ]  45.26M  42.8MB/s               "
+      "      v5r3-L6-D2048   0%[                    ] 170.49K   181KB/s               "
      ]
     },
     {
@@ -286,7 +291,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5r3-L6-D2048-   5%[>                   ]  54.89M  43.6MB/s               "
+      "     v5r3-L6-D2048-   0%[                    ] 217.59K   185KB/s               "
      ]
     },
     {
@@ -294,7 +299,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5r3-L6-D2048-E   6%[>                   ]  61.03M  41.1MB/s               "
+      "    v5r3-L6-D2048-E   0%[                    ] 270.33K   191KB/s               "
      ]
     },
     {
@@ -302,7 +307,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5r3-L6-D2048-E0   7%[>                   ]  75.78M  43.5MB/s               "
+      "   v5r3-L6-D2048-E0   0%[                    ] 323.64K   200KB/s               "
      ]
     },
     {
@@ -310,7 +315,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5r3-L6-D2048-E0_   8%[>                   ]  87.34M  44.9MB/s               "
+      "  v5r3-L6-D2048-E0_   0%[                    ] 385.17K   204KB/s               "
      ]
     },
     {
@@ -318,7 +323,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5r3-L6-D2048-E0_1   9%[>                   ]  91.55M  41.4MB/s               "
+      " v5r3-L6-D2048-E0_1   0%[                    ] 455.48K   215KB/s               "
      ]
     },
     {
@@ -326,7 +331,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5r3-L6-D2048-E0_1-  10%[=>                  ] 106.29M  43.3MB/s               "
+      "v5r3-L6-D2048-E0_1-   0%[                    ] 534.00K   226KB/s               "
      ]
     },
     {
@@ -334,7 +339,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5r3-L6-D2048-E0_1-m  11%[=>                  ] 114.75M  43.2MB/s               "
+      "5r3-L6-D2048-E0_1-m   0%[                    ] 595.52K   230KB/s               "
      ]
     },
     {
@@ -342,7 +347,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "r3-L6-D2048-E0_1-me  11%[=>                  ] 121.56M  42.5MB/s               "
+      "r3-L6-D2048-E0_1-me   0%[                    ] 680.84K   241KB/s               "
      ]
     },
     {
@@ -350,7 +355,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "3-L6-D2048-E0_1-mem  12%[=>                  ] 122.07M  39.8MB/s    eta 22s    "
+      "3-L6-D2048-E0_1-mem   0%[                    ] 763.09K   249KB/s    eta 69m 41s"
      ]
     },
     {
@@ -358,7 +363,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-L6-D2048-E0_1-mem-  13%[=>                  ] 137.33M  42.0MB/s    eta 22s    "
+      "-L6-D2048-E0_1-mem-   0%[                    ] 841.62K   255KB/s    eta 69m 41s"
      ]
     },
     {
@@ -366,7 +371,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "L6-D2048-E0_1-mem-c  14%[=>                  ] 152.07M  43.6MB/s    eta 22s    "
+      "L6-D2048-E0_1-mem-c   0%[                    ] 920.72K   260KB/s    eta 69m 41s"
      ]
     },
     {
@@ -374,7 +379,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "6-D2048-E0_1-mem-ct  15%[==>                 ] 152.59M  41.2MB/s    eta 22s    "
+      "6-D2048-E0_1-mem-ct   0%[                    ]   1018K   270KB/s    eta 69m 41s"
      ]
     },
     {
@@ -382,7 +387,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-D2048-E0_1-mem-ctx  16%[==>                 ] 167.33M  41.1MB/s    eta 21s    "
+      "-D2048-E0_1-mem-ctx   0%[                    ]   1.08M   276KB/s    eta 69m 41s"
      ]
     },
     {
@@ -390,7 +395,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "D2048-E0_1-mem-ctx-  16%[==>                 ] 167.85M  37.9MB/s    eta 21s    "
+      "D2048-E0_1-mem-ctx-   0%[                    ]   1.18M   285KB/s    eta 60m 45s"
      ]
     },
     {
@@ -398,7 +403,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "2048-E0_1-mem-ctx-5  17%[==>                 ] 181.27M  39.7MB/s    eta 21s    "
+      "2048-E0_1-mem-ctx-5   0%[                    ]   1.28M   293KB/s    eta 60m 45s"
      ]
     },
     {
@@ -406,7 +411,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "048-E0_1-mem-ctx-51  18%[==>                 ] 183.10M  37.2MB/s    eta 21s    "
+      "048-E0_1-mem-ctx-51   0%[                    ]   1.40M   304KB/s    eta 60m 45s"
      ]
     },
     {
@@ -414,7 +419,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "48-E0_1-mem-ctx-512  19%[==>                 ] 198.36M  38.0MB/s    eta 21s    "
+      "48-E0_1-mem-ctx-512   0%[                    ]   1.51M   323KB/s    eta 60m 45s"
      ]
     },
     {
@@ -422,7 +427,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "8-E0_1-mem-ctx-512.  20%[===>                ] 213.11M  40.5MB/s    eta 21s    "
+      "8-E0_1-mem-ctx-512.   0%[                    ]   1.63M   342KB/s    eta 60m 45s"
      ]
     },
     {
@@ -430,7 +435,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-E0_1-mem-ctx-512.p  22%[===>                ] 228.36M  41.5MB/s    eta 21s    "
+      "-E0_1-mem-ctx-512.p   0%[                    ]   1.76M   359KB/s    eta 52m 1s "
      ]
     },
     {
@@ -438,7 +443,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "E0_1-mem-ctx-512.pt  23%[===>                ] 243.09M  42.1MB/s    eta 21s    "
+      "E0_1-mem-ctx-512.pt   0%[                    ]   1.90M   376KB/s    eta 52m 1s "
      ]
     },
     {
@@ -446,7 +451,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "0_1-mem-ctx-512.pth  24%[===>                ] 254.13M  44.3MB/s    eta 21s    "
+      "0_1-mem-ctx-512.pth   0%[                    ]   2.03M   395KB/s    eta 52m 1s "
      ]
     },
     {
@@ -454,7 +459,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "_1-mem-ctx-512.pth   25%[====>               ] 259.40M  41.5MB/s    eta 18s    "
+      "_1-mem-ctx-512.pth    0%[                    ]   2.18M   416KB/s    eta 52m 1s "
      ]
     },
     {
@@ -462,7 +467,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1-mem-ctx-512.pth    27%[====>               ] 274.66M  42.2MB/s    eta 18s    "
+      "1-mem-ctx-512.pth     0%[                    ]   2.33M   434KB/s    eta 52m 1s "
      ]
     },
     {
@@ -470,7 +475,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-mem-ctx-512.pth     28%[====>               ] 289.40M  44.8MB/s    eta 18s    "
+      "-mem-ctx-512.pth      0%[                    ]   2.51M   464KB/s    eta 44m 28s"
      ]
     },
     {
@@ -478,7 +483,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "mem-ctx-512.pth      29%[====>               ] 298.47M  43.8MB/s    eta 18s    "
+      "mem-ctx-512.pth       0%[                    ]   2.69M   488KB/s    eta 44m 28s"
      ]
     },
     {
@@ -486,7 +491,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "em-ctx-512.pth       30%[=====>              ] 305.18M  42.3MB/s    eta 17s    "
+      "em-ctx-512.pth        0%[                    ]   2.89M   515KB/s    eta 44m 28s"
      ]
     },
     {
@@ -494,7 +499,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "m-ctx-512.pth        30%[=====>              ] 313.62M  39.8MB/s    eta 17s    "
+      "m-ctx-512.pth         0%[                    ]   3.12M   552KB/s    eta 44m 28s"
      ]
     },
     {
@@ -502,7 +507,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-ctx-512.pth         32%[=====>              ] 333.86M  42.5MB/s    eta 17s    "
+      "-ctx-512.pth          0%[                    ]   3.36M   585KB/s    eta 44m 28s"
      ]
     },
     {
@@ -510,7 +515,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "ctx-512.pth          33%[=====>              ] 341.22M  44.0MB/s    eta 17s    "
+      "ctx-512.pth           0%[                    ]   3.62M   625KB/s    eta 36m 17s"
      ]
     },
     {
@@ -518,7 +523,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "tx-512.pth           34%[=====>              ] 350.95M  45.4MB/s    eta 16s    "
+      "tx-512.pth            0%[                    ]   3.91M   670KB/s    eta 36m 17s"
      ]
     },
     {
@@ -526,7 +531,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "x-512.pth            35%[======>             ] 365.70M  44.4MB/s    eta 16s    "
+      "x-512.pth             0%[                    ]   4.22M   721KB/s    eta 36m 17s"
      ]
     },
     {
@@ -534,7 +539,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-512.pth             36%[======>             ] 371.37M  45.7MB/s    eta 16s    "
+      "-512.pth              0%[                    ]   4.54M   771KB/s    eta 36m 17s"
      ]
     },
     {
@@ -542,7 +547,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "512.pth              37%[======>             ] 381.47M  44.0MB/s    eta 16s    "
+      "512.pth               0%[                    ]   4.90M   830KB/s    eta 36m 17s"
      ]
     },
     {
@@ -550,7 +555,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "12.pth               39%[======>             ] 396.73M  45.9MB/s    eta 15s    "
+      "12.pth                0%[                    ]   5.28M   890KB/s    eta 28m 36s"
      ]
     },
     {
@@ -558,7 +563,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "2.pth                41%[=======>            ] 419.01M  47.8MB/s    eta 15s    "
+      "2.pth                 0%[                    ]   5.70M   959KB/s    eta 28m 36s"
      ]
     },
     {
@@ -566,7 +571,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      ".pth                 42%[=======>            ] 427.25M  47.9MB/s    eta 15s    "
+      ".pth                  0%[                    ]   6.13M  1.00MB/s    eta 28m 36s"
      ]
     },
     {
@@ -574,7 +579,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "pth                  43%[=======>            ] 442.51M  47.9MB/s    eta 15s    "
+      "pth                   0%[                    ]   6.61M  1.08MB/s    eta 28m 36s"
      ]
     },
     {
@@ -582,7 +587,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "th                   45%[========>           ] 457.76M  46.8MB/s    eta 15s    "
+      "th                    0%[                    ]   7.11M  1.16MB/s    eta 28m 36s"
      ]
     },
     {
@@ -590,7 +595,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "h                    46%[========>           ] 473.02M  47.8MB/s    eta 12s    "
+      "h                     0%[                    ]   7.66M  1.25MB/s    eta 22m 17s"
      ]
     },
     {
@@ -598,7 +603,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                     48%[========>           ] 488.28M  47.6MB/s    eta 12s    "
+      "                      0%[                    ]   8.23M  1.34MB/s    eta 22m 17s"
      ]
     },
     {
@@ -606,7 +611,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                  v  49%[========>           ] 507.63M  50.4MB/s    eta 12s    "
+      "                  v   0%[                    ]   8.84M  1.44MB/s    eta 22m 17s"
      ]
     },
     {
@@ -614,7 +619,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                 v5  50%[=========>          ] 512.48M  51.1MB/s    eta 12s    "
+      "                 v5   0%[                    ]   9.51M  1.56MB/s    eta 22m 17s"
      ]
     },
     {
@@ -622,7 +627,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                v5r  52%[=========>          ] 532.75M  54.5MB/s    eta 12s    "
+      "                v5r   1%[                    ]  10.22M  1.67MB/s    eta 22m 17s"
      ]
     },
     {
@@ -630,7 +635,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "               v5r3  52%[=========>          ] 534.05M  51.4MB/s    eta 11s    "
+      "               v5r3   1%[                    ]  10.97M  1.79MB/s    eta 17m 18s"
      ]
     },
     {
@@ -638,7 +643,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "              v5r3-  53%[=========>          ] 547.49M  52.6MB/s    eta 11s    "
+      "              v5r3-   1%[                    ]  11.76M  1.92MB/s    eta 17m 18s"
      ]
     },
     {
@@ -646,7 +651,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "             v5r3-L  54%[=========>          ] 557.13M  52.7MB/s    eta 11s    "
+      "             v5r3-L   1%[                    ]  12.61M  2.06MB/s    eta 17m 18s"
      ]
     },
     {
@@ -654,7 +659,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "            v5r3-L6  56%[==========>         ] 569.63M  54.0MB/s    eta 11s    "
+      "            v5r3-L6   1%[                    ]  13.50M  2.20MB/s    eta 17m 18s"
      ]
     },
     {
@@ -662,7 +667,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "           v5r3-L6-  56%[==========>         ] 579.31M  55.5MB/s    eta 11s    "
+      "           v5r3-L6-   1%[                    ]  14.45M  2.35MB/s    eta 17m 18s"
      ]
     },
     {
@@ -670,7 +675,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "          v5r3-L6-D  58%[==========>         ] 594.57M  54.4MB/s    eta 9s     "
+      "          v5r3-L6-D   1%[                    ]  15.45M  2.51MB/s    eta 13m 30s"
      ]
     },
     {
@@ -678,7 +683,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5r3-L6-D2  59%[==========>         ] 600.21M  54.9MB/s    eta 9s     "
+      "         v5r3-L6-D2   1%[                    ]  16.51M  2.67MB/s    eta 13m 30s"
      ]
     },
     {
@@ -686,7 +691,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5r3-L6-D20  59%[==========>         ] 602.76M  51.4MB/s    eta 9s     "
+      "        v5r3-L6-D20   1%[                    ]  17.62M  2.84MB/s    eta 13m 30s"
      ]
     },
     {
@@ -694,7 +699,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5r3-L6-D204  60%[===========>        ] 610.35M  48.1MB/s    eta 9s     "
+      "       v5r3-L6-D204   1%[                    ]  18.81M  3.02MB/s    eta 13m 30s"
      ]
     },
     {
@@ -702,7 +707,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5r3-L6-D2048  61%[===========>        ] 625.09M  48.0MB/s    eta 9s     "
+      "      v5r3-L6-D2048   1%[                    ]  20.06M  3.21MB/s    eta 13m 30s"
      ]
     },
     {
@@ -710,7 +715,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5r3-L6-D2048-  62%[===========>        ] 640.36M  48.3MB/s    eta 8s     "
+      "     v5r3-L6-D2048-   2%[                    ]  21.37M  3.41MB/s    eta 10m 37s"
      ]
     },
     {
@@ -718,7 +723,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5r3-L6-D2048-E  64%[===========>        ] 655.62M  47.5MB/s    eta 8s     "
+      "    v5r3-L6-D2048-E   2%[                    ]  22.76M  3.62MB/s    eta 10m 37s"
      ]
     },
     {
@@ -726,7 +731,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5r3-L6-D2048-E0  64%[===========>        ] 656.25M  43.3MB/s    eta 8s     "
+      "   v5r3-L6-D2048-E0   2%[                    ]  24.22M  3.93MB/s    eta 10m 37s"
      ]
     },
     {
@@ -734,7 +739,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5r3-L6-D2048-E0_  65%[============>       ] 669.55M  42.5MB/s    eta 8s     "
+      "  v5r3-L6-D2048-E0_   2%[                    ]  25.75M  4.16MB/s    eta 10m 37s"
      ]
     },
     {
@@ -742,7 +747,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5r3-L6-D2048-E0_1  66%[============>       ] 678.94M  43.2MB/s    eta 8s     "
+      " v5r3-L6-D2048-E0_1   2%[                    ]  27.36M  4.40MB/s    eta 10m 37s"
      ]
     },
     {
@@ -750,7 +755,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5r3-L6-D2048-E0_1-  67%[============>       ] 686.64M  42.0MB/s    eta 7s     "
+      "v5r3-L6-D2048-E0_1-   2%[                    ]  29.04M  4.76MB/s    eta 8m 25s "
      ]
     },
     {
@@ -758,7 +763,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5r3-L6-D2048-E0_1-m  69%[============>       ] 701.90M  42.1MB/s    eta 7s     "
+      "5r3-L6-D2048-E0_1-m   2%[                    ]  30.06M  4.83MB/s    eta 8m 25s "
      ]
     },
     {
@@ -766,7 +771,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "r3-L6-D2048-E0_1-me  70%[=============>      ] 716.64M  42.5MB/s    eta 7s     "
+      "r3-L6-D2048-E0_1-me   3%[                    ]  32.53M  5.26MB/s    eta 8m 25s "
      ]
     },
     {
@@ -774,7 +779,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "3-L6-D2048-E0_1-mem  70%[=============>      ] 717.16M  41.4MB/s    eta 7s     "
+      "3-L6-D2048-E0_1-mem   3%[                    ]  33.84M  5.39MB/s    eta 8m 25s "
      ]
     },
     {
@@ -782,7 +787,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-L6-D2048-E0_1-mem-  71%[=============>      ] 724.85M  39.7MB/s    eta 7s     "
+      "-L6-D2048-E0_1-mem-   3%[                    ]  35.19M  5.53MB/s    eta 8m 25s "
      ]
     },
     {
@@ -790,7 +795,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "L6-D2048-E0_1-mem-c  72%[=============>      ] 732.42M  38.1MB/s    eta 7s     "
+      "L6-D2048-E0_1-mem-c   3%[                    ]  36.58M  5.76MB/s    eta 7m 10s "
      ]
     },
     {
@@ -798,7 +803,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "6-D2048-E0_1-mem-ct  73%[=============>      ] 747.69M  38.4MB/s    eta 7s     "
+      "6-D2048-E0_1-mem-ct   3%[                    ]  38.00M  5.88MB/s    eta 7m 10s "
      ]
     },
     {
@@ -806,7 +811,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-D2048-E0_1-mem-ctx  75%[==============>     ] 762.94M  40.7MB/s    eta 7s     "
+      "-D2048-E0_1-mem-ctx   3%[                    ]  39.45M  5.99MB/s    eta 7m 10s "
      ]
     },
     {
@@ -814,7 +819,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "D2048-E0_1-mem-ctx-  76%[==============>     ] 777.68M  43.3MB/s    eta 7s     "
+      "D2048-E0_1-mem-ctx-   4%[                    ]  40.94M  6.09MB/s    eta 7m 10s "
      ]
     },
     {
@@ -822,7 +827,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "2048-E0_1-mem-ctx-5  77%[==============>     ] 792.94M  42.8MB/s    eta 5s     "
+      "2048-E0_1-mem-ctx-5   4%[                    ]  42.45M  6.19MB/s    eta 7m 10s "
      ]
     },
     {
@@ -830,7 +835,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "048-E0_1-mem-ctx-51  78%[==============>     ] 793.46M  39.6MB/s    eta 5s     "
+      "048-E0_1-mem-ctx-51   4%[                    ]  44.00M  6.35MB/s    eta 6m 21s "
      ]
     },
     {
@@ -838,7 +843,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "48-E0_1-mem-ctx-512  79%[==============>     ] 808.20M  41.4MB/s    eta 5s     "
+      "48-E0_1-mem-ctx-512   4%[                    ]  45.56M  6.27MB/s    eta 6m 21s "
      ]
     },
     {
@@ -846,7 +851,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "8-E0_1-mem-ctx-512.  79%[==============>     ] 812.25M  39.4MB/s    eta 5s     "
+      "8-E0_1-mem-ctx-512.   4%[                    ]  47.15M  6.42MB/s    eta 6m 21s "
      ]
     },
     {
@@ -854,7 +859,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-E0_1-mem-ctx-512.p  81%[===============>    ] 823.97M  40.9MB/s    eta 5s     "
+      "-E0_1-mem-ctx-512.p   4%[                    ]  48.76M  6.51MB/s    eta 6m 21s "
      ]
     },
     {
@@ -862,7 +867,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "E0_1-mem-ctx-512.pt  82%[===============>    ] 838.71M  42.4MB/s    eta 4s     "
+      "E0_1-mem-ctx-512.pt   4%[                    ]  50.33M  6.29MB/s    eta 5m 51s "
      ]
     },
     {
@@ -870,7 +875,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "0_1-mem-ctx-512.pth  82%[===============>    ] 839.35M  41.6MB/s    eta 4s     "
+      "0_1-mem-ctx-512.pth   5%[>                   ]  53.14M  6.47MB/s    eta 5m 51s "
      ]
     },
     {
@@ -878,7 +883,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "_1-mem-ctx-512.pth   85%[================>   ] 866.03M  44.5MB/s    eta 4s     "
+      "_1-mem-ctx-512.pth    5%[>                   ]  54.33M  6.25MB/s    eta 5m 51s "
      ]
     },
     {
@@ -886,7 +891,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1-mem-ctx-512.pth    85%[================>   ] 869.75M  42.1MB/s    eta 4s     "
+      "1-mem-ctx-512.pth     5%[>                   ]  55.53M  6.40MB/s    eta 5m 51s "
      ]
     },
     {
@@ -894,7 +899,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-mem-ctx-512.pth     87%[================>   ] 885.01M  42.8MB/s    eta 4s     "
+      "-mem-ctx-512.pth      5%[>                   ]  56.75M  6.47MB/s    eta 5m 51s "
      ]
     },
     {
@@ -902,7 +907,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "mem-ctx-512.pth      87%[================>   ] 893.44M  44.4MB/s    eta 3s     "
+      "mem-ctx-512.pth       5%[>                   ]  57.98M  6.05MB/s    eta 5m 22s "
      ]
     },
     {
@@ -910,7 +915,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "em-ctx-512.pth       88%[================>   ] 900.27M  43.8MB/s    eta 3s     "
+      "em-ctx-512.pth        5%[>                   ]  59.25M  5.96MB/s    eta 5m 22s "
      ]
     },
     {
@@ -918,7 +923,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "m-ctx-512.pth        89%[================>   ] 911.78M  42.6MB/s    eta 3s     "
+      "m-ctx-512.pth         5%[>                   ]  60.51M  6.06MB/s    eta 5m 22s "
      ]
     },
     {
@@ -926,7 +931,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-ctx-512.pth         90%[=================>  ] 915.53M  41.5MB/s    eta 3s     "
+      "-ctx-512.pth          6%[>                   ]  61.79M  6.09MB/s    eta 5m 22s "
      ]
     },
     {
@@ -934,7 +939,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "ctx-512.pth          91%[=================>  ] 930.78M  40.9MB/s    eta 2s     "
+      "ctx-512.pth           6%[>                   ]  63.09M  6.06MB/s    eta 5m 22s "
      ]
     },
     {
@@ -942,7 +947,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "tx-512.pth           92%[=================>  ] 944.21M  41.4MB/s    eta 2s     "
+      "tx-512.pth            6%[>                   ]  64.40M  5.88MB/s    eta 5m 5s  "
      ]
     },
     {
@@ -950,7 +955,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "x-512.pth            94%[=================>  ] 956.92M  42.2MB/s    eta 2s     "
+      "x-512.pth             6%[>                   ]  65.73M  5.76MB/s    eta 5m 5s  "
      ]
     },
     {
@@ -958,7 +963,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-512.pth             94%[=================>  ] 961.30M  42.7MB/s    eta 2s     "
+      "-512.pth              6%[>                   ]  67.06M  5.90MB/s    eta 5m 5s  "
      ]
     },
     {
@@ -966,7 +971,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "512.pth              96%[==================> ] 980.99M  44.8MB/s    eta 1s     "
+      "512.pth               6%[>                   ]  68.40M  5.89MB/s    eta 5m 5s  "
      ]
     },
     {
@@ -974,7 +979,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "12.pth               97%[==================> ] 986.93M  42.2MB/s    eta 1s     "
+      "12.pth                6%[>                   ]  69.76M  5.57MB/s    eta 5m 5s  "
      ]
     },
     {
@@ -982,7 +987,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "2.pth                98%[==================> ]   1004M  44.5MB/s    eta 1s     "
+      "2.pth                 6%[>                   ]  71.12M  5.78MB/s    eta 4m 50s "
      ]
     },
     {
@@ -990,7 +995,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      ".pth                 99%[==================> ]   1007M  42.6MB/s    eta 1s     "
+      ".pth                  7%[>                   ]  72.48M  5.87MB/s    eta 4m 50s "
      ]
     },
     {
@@ -998,3468 +1003,1334 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5r3-L6-D2048-E0_1- 100%[===================>]   1017M  45.2MB/s    in 23s     \r\n",
-      "\r\n",
-      "2023-09-14 02:37:32 (44.1 MB/s) - ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’ saved [1066537217/1066537217]\r\n",
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 1018M\r\n",
-      "drwxr-xr-x  2 root root  4.0K Sep 14 02:37 .\r\n",
-      "drwxr-xr-x 20 root root  4.0K Sep 14 02:37 ..\r\n",
-      "-rw-r--r--  1 root root 1018M Sep 13 20:28 v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
-    "\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    ls -alh ."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5700b4b5",
-   "metadata": {
-    "papermill": {
-     "duration": 0.008064,
-     "end_time": "2023-09-14T02:37:32.449084",
-     "exception": false,
-     "start_time": "2023-09-14T02:37:32.441020",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
-    "\n",
-    "- Tune 3: Large ctx size (8192), Scaling up!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "d5f911bc",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:32.468291Z",
-     "iopub.status.busy": "2023-09-14T02:37:32.467681Z",
-     "iopub.status.idle": "2023-09-14T02:37:52.531727Z",
-     "shell.execute_reply": "2023-09-14T02:37:52.530635Z"
-    },
-    "papermill": {
-     "duration": 20.125122,
-     "end_time": "2023-09-14T02:37:52.582572",
-     "exception": false,
-     "start_time": "2023-09-14T02:37:32.457450",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## Generating word reptition dataset ##\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+      "pth                   7%[>                   ]  73.86M  5.49MB/s    eta 4m 50s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+      "\r",
+      "th                    7%[>                   ]  75.23M  5.67MB/s    eta 4m 50s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+      "\r",
+      "h                     7%[>                   ]  76.62M  5.72MB/s    eta 4m 50s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+      "\r",
+      "                      7%[>                   ]  78.01M  5.64MB/s    eta 4m 37s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+      "\r",
+      "                  v   7%[>                   ]  79.40M  5.62MB/s    eta 4m 37s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+      "\r",
+      "                 v5   7%[>                   ]  80.79M  5.81MB/s    eta 4m 37s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+      "\r",
+      "                v5r   8%[>                   ]  82.19M  5.88MB/s    eta 4m 37s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+      "\r",
+      "               v5r3   8%[>                   ]  83.59M  5.80MB/s    eta 4m 37s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 374 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+      "\r",
+      "              v5r3-   8%[>                   ]  84.98M  5.81MB/s    eta 4m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 869 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+      "\r",
+      "             v5r3-L   8%[>                   ]  86.39M  5.72MB/s    eta 4m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+      "\r",
+      "            v5r3-L6   8%[>                   ]  87.79M  6.01MB/s    eta 4m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 528 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-   8%[>                   ]  89.20M  5.89MB/s    eta 4m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D   8%[>                   ]  90.61M  5.99MB/s    eta 4m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1060 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2   9%[>                   ]  92.01M  5.98MB/s    eta 4m 15s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 747 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D20   9%[>                   ]  93.42M  5.91MB/s    eta 4m 15s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D204   9%[>                   ]  94.83M  5.80MB/s    eta 4m 15s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1301 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2048   9%[>                   ]  96.22M  6.05MB/s    eta 4m 15s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 100 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2048-   9%[>                   ]  97.62M  5.93MB/s    eta 4m 15s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2048-E   9%[>                   ]  99.03M  5.88MB/s    eta 4m 6s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 650 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2048-E0   9%[>                   ] 100.44M  6.01MB/s    eta 4m 6s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2048-E0_  10%[=>                  ] 101.84M  5.95MB/s    eta 4m 6s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2048-E0_1  10%[=>                  ] 103.25M  5.89MB/s    eta 4m 6s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 81 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2048-E0_1-  10%[=>                  ] 104.65M  6.02MB/s    eta 4m 6s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2048-E0_1-m  10%[=>                  ] 106.06M  5.95MB/s    eta 3m 58s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1794 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+      "\r",
+      "r3-L6-D2048-E0_1-me  10%[=>                  ] 107.47M  5.96MB/s    eta 3m 58s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 529 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "\r",
+      "3-L6-D2048-E0_1-mem  10%[=>                  ] 108.87M  6.08MB/s    eta 3m 58s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+      "\r",
+      "-L6-D2048-E0_1-mem-  10%[=>                  ] 110.28M  5.96MB/s    eta 3m 58s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+      "\r",
+      "L6-D2048-E0_1-mem-c  10%[=>                  ] 111.70M  5.85MB/s    eta 3m 58s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+      "\r",
+      "6-D2048-E0_1-mem-ct  11%[=>                  ] 113.11M  6.08MB/s    eta 3m 51s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 352 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+      "\r",
+      "-D2048-E0_1-mem-ctx  11%[=>                  ] 114.53M  5.85MB/s    eta 3m 51s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+      "\r",
+      "D2048-E0_1-mem-ctx-  11%[=>                  ] 115.95M  6.10MB/s    eta 3m 51s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 293 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+      "\r",
+      "2048-E0_1-mem-ctx-5  11%[=>                  ] 117.39M  6.12MB/s    eta 3m 51s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+      "\r",
+      "048-E0_1-mem-ctx-51  11%[=>                  ] 118.81M  6.13MB/s    eta 3m 51s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+      "\r",
+      "48-E0_1-mem-ctx-512  11%[=>                  ] 120.25M  6.01MB/s    eta 3m 44s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 26 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+      "\r",
+      "8-E0_1-mem-ctx-512.  11%[=>                  ] 121.69M  5.94MB/s    eta 3m 44s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  12%[=>                  ] 123.14M  6.17MB/s    eta 3m 44s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  12%[=>                  ] 124.59M  6.04MB/s    eta 3m 44s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 313 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  12%[=>                  ] 126.06M  5.99MB/s    eta 3m 44s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 264 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   12%[=>                  ] 127.54M  6.22MB/s    eta 3m 38s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    12%[=>                  ] 129.03M  6.11MB/s    eta 3m 38s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 184 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     12%[=>                  ] 130.53M  6.12MB/s    eta 3m 38s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      12%[=>                  ] 132.04M  6.03MB/s    eta 3m 38s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       13%[=>                  ] 133.56M  6.34MB/s    eta 3m 38s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        13%[=>                  ] 135.11M  6.23MB/s    eta 3m 32s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 439 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         13%[=>                  ] 136.67M  6.14MB/s    eta 3m 32s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          13%[=>                  ] 138.25M  6.44MB/s    eta 3m 32s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+      "\r",
+      "tx-512.pth           13%[=>                  ] 139.84M  6.35MB/s    eta 3m 32s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+      "\r",
+      "x-512.pth            13%[=>                  ] 141.45M  6.54MB/s    eta 3m 32s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+      "\r",
+      "-512.pth             14%[=>                  ] 143.09M  6.46MB/s    eta 3m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 401 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+      "\r",
+      "512.pth              14%[=>                  ] 144.73M  6.38MB/s    eta 3m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+      "\r",
+      "12.pth               14%[=>                  ] 146.42M  6.69MB/s    eta 3m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+      "\r",
+      "2.pth                14%[=>                  ] 148.12M  6.54MB/s    eta 3m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+      "\r",
+      ".pth                 14%[=>                  ] 149.86M  6.88MB/s    eta 3m 25s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+      "\r",
+      "pth                  14%[=>                  ] 151.62M  6.96MB/s    eta 3m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+      "\r",
+      "th                   15%[==>                 ] 153.42M  6.89MB/s    eta 3m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+      "\r",
+      "h                    15%[==>                 ] 155.23M  6.93MB/s    eta 3m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+      "\r",
+      "                     15%[==>                 ] 157.08M  7.30MB/s    eta 3m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 280 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+      "\r",
+      "                  v  15%[==>                 ] 158.95M  7.33MB/s    eta 3m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+      "\r",
+      "                 v5  15%[==>                 ] 160.19M  7.17MB/s    eta 3m 12s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 2607 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+      "\r",
+      "                v5r  15%[==>                 ] 161.01M  7.14MB/s    eta 3m 12s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+      "\r",
+      "               v5r3  16%[==>                 ] 162.95M  7.14MB/s    eta 3m 12s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+      "\r",
+      "              v5r3-  16%[==>                 ] 164.95M  7.63MB/s    eta 3m 12s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+      "\r",
+      "             v5r3-L  16%[==>                 ] 167.01M  7.54MB/s    eta 3m 12s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  16%[==>                 ] 169.09M  7.62MB/s    eta 3m 5s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  16%[==>                 ] 171.23M  8.09MB/s    eta 3m 5s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  17%[==>                 ] 173.42M  7.87MB/s    eta 3m 5s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 21 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  17%[==>                 ] 175.65M  8.43MB/s    eta 3m 5s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D20  17%[==>                 ] 177.94M  8.48MB/s    eta 3m 5s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D204  17%[==>                 ] 180.28M  8.42MB/s    eta 2m 57s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2048  17%[==>                 ] 182.67M  8.93MB/s    eta 2m 57s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 482 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2048-  18%[==>                 ] 185.12M  8.79MB/s    eta 2m 57s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2048-E  18%[==>                 ] 187.65M  9.09MB/s    eta 2m 57s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2048-E0  18%[==>                 ] 190.23M  9.09MB/s    eta 2m 57s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2048-E0_  18%[==>                 ] 192.89M  9.54MB/s    eta 2m 48s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 267 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2048-E0_1  19%[==>                 ] 195.59M  9.55MB/s    eta 2m 48s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2048-E0_1-  19%[==>                 ] 198.34M  10.2MB/s    eta 2m 48s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2048-E0_1-m  19%[==>                 ] 201.22M  10.1MB/s    eta 2m 48s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+      "\r",
+      "r3-L6-D2048-E0_1-me  20%[===>                ] 204.00M  10.3MB/s    eta 2m 48s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+      "\r",
+      "3-L6-D2048-E0_1-mem  20%[===>                ] 207.01M  10.3MB/s    eta 2m 39s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+      "\r",
+      "-L6-D2048-E0_1-mem-  20%[===>                ] 210.11M  11.0MB/s    eta 2m 39s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+      "\r",
+      "L6-D2048-E0_1-mem-c  20%[===>                ] 213.29M  11.0MB/s    eta 2m 39s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+      "\r",
+      "6-D2048-E0_1-mem-ct  21%[===>                ] 216.56M  11.1MB/s    eta 2m 39s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+      "\r",
+      "-D2048-E0_1-mem-ctx  21%[===>                ] 219.92M  11.8MB/s    eta 2m 39s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+      "\r",
+      "D2048-E0_1-mem-ctx-  21%[===>                ] 222.69M  11.6MB/s    eta 2m 29s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+      "\r",
+      "2048-E0_1-mem-ctx-5  22%[===>                ] 226.00M  11.6MB/s    eta 2m 29s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+      "\r",
+      "048-E0_1-mem-ctx-51  22%[===>                ] 229.44M  12.3MB/s    eta 2m 29s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+      "\r",
+      "48-E0_1-mem-ctx-512  22%[===>                ] 232.97M  12.3MB/s    eta 2m 29s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+      "\r",
+      "8-E0_1-mem-ctx-512.  23%[===>                ] 236.69M  12.3MB/s    eta 2m 29s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  23%[===>                ] 240.34M  13.0MB/s    eta 2m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  23%[===>                ] 244.11M  13.0MB/s    eta 2m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  24%[===>                ] 247.87M  13.1MB/s    eta 2m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   24%[===>                ] 251.87M  13.7MB/s    eta 2m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    25%[====>               ] 255.64M  13.7MB/s    eta 2m 19s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     25%[====>               ] 259.62M  13.7MB/s    eta 2m 9s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      25%[====>               ] 263.37M  14.3MB/s    eta 2m 9s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       26%[====>               ] 267.14M  14.2MB/s    eta 2m 9s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        26%[====>               ] 271.37M  14.3MB/s    eta 2m 9s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         27%[====>               ] 275.03M  14.2MB/s    eta 2m 2s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          27%[====>               ] 279.12M  14.4MB/s    eta 2m 2s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+      "\r",
+      "tx-512.pth           27%[====>               ] 282.20M  14.4MB/s    eta 2m 2s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+      "\r",
+      "x-512.pth            28%[====>               ] 285.34M  14.4MB/s    eta 2m 2s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 48 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+      "\r",
+      "-512.pth             28%[====>               ] 288.56M  14.4MB/s    eta 2m 2s  "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+      "\r",
+      "512.pth              28%[====>               ] 291.84M  14.4MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5563 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+      "\r",
+      "12.pth               29%[====>               ] 295.17M  14.5MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+      "\r",
+      "2.pth                29%[====>               ] 297.29M  14.0MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+      "\r",
+      ".pth                 29%[====>               ] 301.00M  14.1MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+      "\r",
+      "pth                  29%[====>               ] 303.42M  14.0MB/s    eta 1m 52s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+      "\r",
+      "th                   29%[====>               ] 303.50M  13.4MB/s    eta 1m 52s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+      "\r",
+      "h                    30%[=====>              ] 305.98M  13.1MB/s    eta 1m 52s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+      "\r",
+      "                     30%[=====>              ] 308.53M  12.9MB/s    eta 1m 52s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+      "\r",
+      "                  v  30%[=====>              ] 311.09M  12.7MB/s    eta 1m 52s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 140 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+      "\r",
+      "                 v5  30%[=====>              ] 313.67M  12.4MB/s    eta 1m 49s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+      "\r",
+      "                v5r  31%[=====>              ] 316.28M  12.2MB/s    eta 1m 49s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+      "\r",
+      "               v5r3  31%[=====>              ] 318.94M  11.9MB/s    eta 1m 49s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+      "\r",
+      "              v5r3-  31%[=====>              ] 321.59M  11.7MB/s    eta 1m 49s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+      "\r",
+      "             v5r3-L  31%[=====>              ] 324.28M  11.5MB/s    eta 1m 49s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  32%[=====>              ] 326.95M  11.2MB/s    eta 1m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  32%[=====>              ] 329.69M  11.2MB/s    eta 1m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  32%[=====>              ] 332.44M  11.2MB/s    eta 1m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  32%[=====>              ] 335.20M  10.9MB/s    eta 1m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D20  33%[=====>              ] 337.97M  10.8MB/s    eta 1m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D204  33%[=====>              ] 340.76M  10.7MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2048  33%[=====>              ] 343.59M  11.1MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2048-  34%[=====>              ] 346.44M  11.2MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2048-E  34%[=====>              ] 349.26M  11.3MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2048-E0  34%[=====>              ] 352.09M  11.4MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2048-E0_  34%[=====>              ] 354.86M  11.6MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2048-E0_1  35%[======>             ] 357.48M  11.6MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2048-E0_1-  35%[======>             ] 360.34M  11.7MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2048-E0_1-m  35%[======>             ] 363.25M  11.8MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n"
+      "\r",
+      "r3-L6-D2048-E0_1-me  35%[======>             ] 366.14M  11.9MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n"
+      "\r",
+      "3-L6-D2048-E0_1-mem  36%[======>             ] 369.04M  11.9MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+      "\r",
+      "-L6-D2048-E0_1-mem-  36%[======>             ] 371.97M  12.0MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+      "\r",
+      "L6-D2048-E0_1-mem-c  36%[======>             ] 374.89M  11.9MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+      "\r",
+      "6-D2048-E0_1-mem-ct  37%[======>             ] 377.72M  11.4MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+      "\r",
+      "-D2048-E0_1-mem-ctx  37%[======>             ] 381.44M  11.8MB/s    eta 91s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+      "\r",
+      "D2048-E0_1-mem-ctx-  37%[======>             ] 383.50M  11.6MB/s    eta 91s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+      "\r",
+      "2048-E0_1-mem-ctx-5  37%[======>             ] 385.61M  11.3MB/s    eta 91s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+      "\r",
+      "048-E0_1-mem-ctx-51  38%[======>             ] 387.75M  11.4MB/s    eta 91s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+      "\r",
+      "48-E0_1-mem-ctx-512  38%[======>             ] 389.92M  11.3MB/s    eta 91s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+      "\r",
+      "8-E0_1-mem-ctx-512.  38%[======>             ] 392.11M  11.2MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  38%[======>             ] 394.33M  11.0MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  38%[======>             ] 396.56M  10.9MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  39%[======>             ] 398.83M  10.8MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   39%[======>             ] 401.11M  10.7MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    39%[======>             ] 403.42M  10.6MB/s    eta 86s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     39%[======>             ] 405.75M  10.4MB/s    eta 86s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      40%[=======>            ] 408.08M  10.2MB/s    eta 86s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       40%[=======>            ] 410.44M  10.1MB/s    eta 86s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        40%[=======>            ] 412.81M  9.85MB/s    eta 86s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         40%[=======>            ] 415.19M  9.71MB/s    eta 84s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          41%[=======>            ] 417.58M  9.94MB/s    eta 84s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+      "\r",
+      "tx-512.pth           41%[=======>            ] 419.97M  9.82MB/s    eta 84s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+      "\r",
+      "x-512.pth            41%[=======>            ] 422.39M  9.92MB/s    eta 84s    "
      ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d02fd021",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
-     ]
+    "tags": []
+   },
+   "source": [
+    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 3: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "535fef69",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## Done ##\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 6.1G\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  21K Sep 14 02:37 gen-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 2.1M Sep 14 02:37 gen-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  20M Sep 14 02:37 gen-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  22M Sep 14 02:37 gen-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  23M Sep 14 02:37 gen-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  25M Sep 14 02:37 gen-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27M Sep 14 02:37 gen-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  24K Sep 14 02:37 gen-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29M Sep 14 02:37 gen-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  31M Sep 14 02:37 gen-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  33M Sep 14 02:37 gen-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  35M Sep 14 02:37 gen-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  37M Sep 14 02:37 gen-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  30K Sep 14 02:37 gen-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 4.0M Sep 14 02:37 gen-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  39M Sep 14 02:37 gen-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  41M Sep 14 02:37 gen-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  42M Sep 14 02:37 gen-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  44M Sep 14 02:37 gen-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  46M Sep 14 02:37 gen-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  35K Sep 14 02:37 gen-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  48M Sep 14 02:37 gen-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  50M Sep 14 02:37 gen-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  52M Sep 14 02:37 gen-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  54M Sep 14 02:37 gen-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  56M Sep 14 02:37 gen-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  39K Sep 14 02:37 gen-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 5.9M Sep 14 02:37 gen-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  58M Sep 14 02:37 gen-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  60M Sep 14 02:37 gen-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  61M Sep 14 02:37 gen-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  63M Sep 14 02:37 gen-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  65M Sep 14 02:37 gen-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  45K Sep 14 02:37 gen-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  67M Sep 14 02:37 gen-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  69M Sep 14 02:37 gen-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  71M Sep 14 02:37 gen-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  73M Sep 14 02:37 gen-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  75M Sep 14 02:37 gen-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  49K Sep 14 02:37 gen-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 7.8M Sep 14 02:37 gen-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  77M Sep 14 02:37 gen-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  79M Sep 14 02:37 gen-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  80M Sep 14 02:37 gen-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  82M Sep 14 02:37 gen-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  84M Sep 14 02:37 gen-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  54K Sep 14 02:37 gen-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  86M Sep 14 02:37 gen-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  88M Sep 14 02:37 gen-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  90M Sep 14 02:37 gen-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  92M Sep 14 02:37 gen-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  94M Sep 14 02:37 gen-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  15K Sep 14 02:37 gen-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  57K Sep 14 02:37 gen-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 9.7M Sep 14 02:37 gen-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  96M Sep 14 02:37 gen-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  97M Sep 14 02:37 gen-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  99M Sep 14 02:37 gen-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 101M Sep 14 02:37 gen-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 103M Sep 14 02:37 gen-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  62K Sep 14 02:37 gen-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 105M Sep 14 02:37 gen-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 107M Sep 14 02:37 gen-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 109M Sep 14 02:37 gen-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 111M Sep 14 02:37 gen-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 113M Sep 14 02:37 gen-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  68K Sep 14 02:37 gen-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  12M Sep 14 02:37 gen-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 115M Sep 14 02:37 gen-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 117M Sep 14 02:37 gen-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 118M Sep 14 02:37 gen-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 120M Sep 14 02:37 gen-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 122M Sep 14 02:37 gen-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  73K Sep 14 02:37 gen-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 124M Sep 14 02:37 gen-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 126M Sep 14 02:37 gen-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 128M Sep 14 02:37 gen-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 130M Sep 14 02:37 gen-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 132M Sep 14 02:37 gen-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  79K Sep 14 02:37 gen-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  14M Sep 14 02:37 gen-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 134M Sep 14 02:37 gen-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 136M Sep 14 02:37 gen-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 137M Sep 14 02:37 gen-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 139M Sep 14 02:37 gen-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 141M Sep 14 02:37 gen-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  83K Sep 14 02:37 gen-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 143M Sep 14 02:37 gen-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 145M Sep 14 02:37 gen-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 147M Sep 14 02:37 gen-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 149M Sep 14 02:37 gen-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 151M Sep 14 02:37 gen-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  86K Sep 14 02:37 gen-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  16M Sep 14 02:37 gen-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 153M Sep 14 02:37 gen-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  88K Sep 14 02:37 gen-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  18M Sep 14 02:37 gen-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-95-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 500K Sep 14 02:37 shuffle-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 525K Sep 14 02:37 shuffle-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 432K Sep 14 02:37 shuffle-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 393K Sep 14 02:37 shuffle-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 540K Sep 14 02:37 shuffle-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 517K Sep 14 02:37 shuffle-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 357K Sep 14 02:37 shuffle-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 512K Sep 14 02:37 shuffle-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 510K Sep 14 02:37 shuffle-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 334K Sep 14 02:37 shuffle-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 534K Sep 14 02:37 shuffle-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 322K Sep 14 02:37 shuffle-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 314K Sep 14 02:37 shuffle-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 317K Sep 14 02:37 shuffle-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 822K Sep 14 02:37 shuffle-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 311K Sep 14 02:37 shuffle-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 301K Sep 14 02:37 shuffle-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 300K Sep 14 02:37 shuffle-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 530K Sep 14 02:37 shuffle-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 284K Sep 14 02:37 shuffle-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 291K Sep 14 02:37 shuffle-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 283K Sep 14 02:37 shuffle-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 286K Sep 14 02:37 shuffle-word-95-count.jsonl\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%script bash\n",
-    "\n",
-    "########################################\n",
-    "# Generate the required jsonl dataset\n",
-    "########################################\n",
-    "\n",
-    "# Go to config dir\n",
-    "cd \"../\"\n",
-    "\n",
-    "# Reset the dataset dir\n",
-    "mkdir -p ../dataset\n",
-    "rm -rf ../dataset/*.jsonl\n",
-    "\n",
-    "# Generate the various datasets\n",
-    "echo \"## Generating word reptition dataset ##\"\n",
-    "\n",
-    "#\n",
-    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
-    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
-    "#\n",
-    "for i in {5..100..5} \n",
-    "do\n",
-    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
-    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
-    "done\n",
-    "\n",
-    "#\n",
-    "# Ramping up the 100+ - 4200 words dataset\n",
-    "# \n",
-    "for i in {100..8000..100} \n",
-    "do\n",
-    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
-    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
-    "done\n",
-    "\n",
-    "wait\n",
-    "echo \"## Done ##\"\n",
-    "\n",
-    "ls -lh ../dataset/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "af9b83d3",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-14T02:37:52.652229Z",
-     "iopub.status.busy": "2023-09-14T02:37:52.651608Z",
-     "iopub.status.idle": "2023-09-14T02:38:21.558685Z",
-     "shell.execute_reply": "2023-09-14T02:38:21.557835Z"
-    },
-    "papermill": {
-     "duration": 28.945501,
-     "end_time": "2023-09-14T02:38:21.561935",
-     "exception": false,
-     "start_time": "2023-09-14T02:37:52.616434",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-09-14 02:37:56,909] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'].\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1547623296\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 1547623296\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "GPU available: True (cuda), used: True\r\n",
-      "TPU available: False, using: 0 TPU cores\r\n",
-      "IPU available: False, using: 0 IPUs\r\n",
-      "HPU available: False, using: 0 HPUs\r\n",
-      "\r\n",
-      "\r\n",
-      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
-      "   - target_batch_size:       256\r\n",
-      "   - num_nodes:               1\r\n",
-      "   - num_devices:             1\r\n",
-      "   - accumulate_grad_batches: 256\r\n",
-      "   - effective_batch_size:    256\r\n",
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Resolving data files:   0%|                             | 0/198 [00:00<?, ?it/s]\r",
-      "Resolving data files: 100%|███████████████| 198/198 [00:00<00:00, 176959.77it/s]\r\n",
-      "\r",
-      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 279.12it/s]\r\n",
-      "\r",
-      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00, 16.15it/s]\r\n",
-      "\r",
-      "Generating train split: 0 examples [00:00, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Failed to read file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset/gen-word-100-count.jsonl' with error <class 'pyarrow.lib.ArrowInvalid'>: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n",
-      "\r",
-      "Generating train split: 100 examples [00:01, 58.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 4119 examples [00:01, 3148.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 9991 examples [00:01, 8377.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 16136 examples [00:02, 14631.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 21292 examples [00:02, 19123.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 42124 examples [00:02, 48567.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 50633 examples [00:02, 48135.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 59623 examples [00:02, 55584.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 67337 examples [00:02, 53321.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 74360 examples [00:02, 50060.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 80571 examples [00:03, 46642.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 85925 examples [00:03, 46695.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 91087 examples [00:03, 42287.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 95851 examples [00:03, 41277.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 100328 examples [00:03, 41903.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 105109 examples [00:03, 43330.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 109873 examples [00:03, 38887.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 114106 examples [00:03, 38800.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 118536 examples [00:04, 40082.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 122694 examples [00:04, 38268.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 127281 examples [00:04, 38358.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 131204 examples [00:04, 37584.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 135020 examples [00:04, 32997.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 139737 examples [00:04, 35623.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 143828 examples [00:04, 36646.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 147603 examples [00:04, 34959.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 151330 examples [00:05, 28703.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 154519 examples [00:05, 26946.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 157445 examples [00:05, 25877.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 160186 examples [00:05, 24367.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 162774 examples [00:05, 23524.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 165326 examples [00:05, 21662.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 167801 examples [00:05, 18798.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 169818 examples [00:06, 17953.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 171755 examples [00:06, 11617.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 171974 examples [00:06, 26184.81 examples/s]\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "multiprocess.pool.RemoteTraceback: \r\n",
-      "\"\"\"\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n",
-      "    dataset = json.load(f)\r\n",
-      "  File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n",
-      "    return loads(fp.read(),\r\n",
-      "  File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n",
-      "    return _default_decoder.decode(s)\r\n",
-      "  File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n",
-      "    raise JSONDecodeError(\"Extra data\", s, end)\r\n",
-      "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1231)\r\n",
-      "\r\n",
-      "During handling of the above exception, another exception occurred:\r\n",
-      "\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n",
-      "    for _, table in generator:\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n",
-      "    raise e\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n",
-      "    pa_table = paj.read_json(\r\n",
-      "  File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n",
-      "  File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n",
-      "  File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n",
-      "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n",
-      "\r\n",
-      "The above exception was the direct cause of the following exception:\r\n",
-      "\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n",
-      "    result = (True, func(*args, **kwds))\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n",
-      "    for i, result in enumerate(func(**kwargs)):\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n",
-      "    raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n",
-      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
-      "\"\"\"\r\n",
-      "\r\n",
-      "The above exception was the direct cause of the following exception:\r\n",
-      "\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in <module>\r\n",
-      "    cli_main()\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
-      "    LightningCLI(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n",
-      "    self._run_subcommand(self.subcommand)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n",
-      "    fn(**fn_kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n",
-      "    call._call_and_handle_interrupt(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n",
-      "    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n",
-      "    return function(*args, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n",
-      "    self._run(model, ckpt_path=ckpt_path)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n",
-      "    self._data_connector.prepare_data()\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n",
-      "    call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n",
-      "    return fn(*args, **kwargs)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n",
-      "    prepare_data_static(**self._init_locals)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n",
-      "    src_dataset = load_dataset(**load_dataset_params)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n",
-      "    builder_instance.download_and_prepare(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n",
-      "    self._download_and_prepare(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n",
-      "    self._prepare_split(split_generator, **prepare_split_kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n",
-      "    for job_id, done, content in iflatmap_unordered(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n",
-      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in <listcomp>\r\n",
-      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n",
-      "    raise self._value\r\n",
-      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: - 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r"
-     ]
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Go to config dir\n",
+    "cd \"../\"\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "for i in {5..95..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 100+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {100..8000..100} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40beda36",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v54\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs/logs\u001b[0m\r\n"
-     ]
-    }
-   ],
+    "tags": []
+   },
+   "outputs": [],
    "source": [
     "# Start the finetune model training\n",
     "!cd \"{TRAINER_DIR}\" && \\\n",
@@ -4481,54 +2352,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "6db19b87",
+   "execution_count": null,
+   "id": "19eaf20f",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-14T02:38:21.728358Z",
-     "iopub.status.busy": "2023-09-14T02:38:21.727768Z",
-     "iopub.status.idle": "2023-09-14T02:38:25.677206Z",
-     "shell.execute_reply": "2023-09-14T02:38:25.676074Z"
-    },
     "papermill": {
-     "duration": 3.986198,
-     "end_time": "2023-09-14T02:38:25.679730",
-     "exception": false,
-     "start_time": "2023-09-14T02:38:21.693532",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-09-14 02:38:24,136] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
-      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
-      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
-      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
-      "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Lets export the model from the checkpoint\n",
     "!cd \"{TRAINER_DIR}\" && \\\n",
@@ -4540,33 +2376,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "ec391cb3",
+   "execution_count": null,
+   "id": "e9be9d4f",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-14T02:38:25.748202Z",
-     "iopub.status.busy": "2023-09-14T02:38:25.747247Z",
-     "iopub.status.idle": "2023-09-14T02:38:26.013713Z",
-     "shell.execute_reply": "2023-09-14T02:38:26.012573Z"
-    },
     "papermill": {
-     "duration": 0.303358,
-     "end_time": "2023-09-14T02:38:26.016073",
-     "exception": false,
-     "start_time": "2023-09-14T02:38:25.712715",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Lets do a quick memory test\n",
     "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
@@ -4574,33 +2396,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "2748101d",
+   "execution_count": null,
+   "id": "1449d8ff",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-14T02:38:26.085118Z",
-     "iopub.status.busy": "2023-09-14T02:38:26.084169Z",
-     "iopub.status.idle": "2023-09-14T02:38:26.352535Z",
-     "shell.execute_reply": "2023-09-14T02:38:26.351341Z"
-    },
     "papermill": {
-     "duration": 0.305573,
-     "end_time": "2023-09-14T02:38:26.354898",
-     "exception": false,
-     "start_time": "2023-09-14T02:38:26.049325",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
    ]
@@ -4622,18 +2430,18 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.4"
   },
   "papermill": {
    "default_parameters": {},
-   "duration": 83.790874,
-   "end_time": "2023-09-14T02:38:26.808961",
+   "duration": null,
+   "end_time": null,
    "environment_variables": {},
    "exception": null,
    "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
    "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
    "parameters": {},
-   "start_time": "2023-09-14T02:37:03.018087",
+   "start_time": "2023-09-14T04:09:41.695596",
    "version": "2.4.0"
   }
  },