diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb" @@ -1,3 +1,4642 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0439ce7fb6866af36cb53bbddaf6a1ed49656c85a84d6a2aabd6754b30fa2109 -size 61159745 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e6bf5eac", + "metadata": { + "papermill": { + "duration": 0.005682, + "end_time": "2023-09-14T02:37:04.293470", + "exception": false, + "start_time": "2023-09-14T02:37:04.287788", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f59fa274", + "metadata": { + "papermill": { + "duration": 0.003026, + "end_time": "2023-09-14T02:37:04.300149", + "exception": false, + "start_time": "2023-09-14T02:37:04.297123", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b9505f51", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:04.308476Z", + "iopub.status.busy": "2023-09-14T02:37:04.307936Z", + "iopub.status.idle": "2023-09-14T02:37:05.309488Z", + "shell.execute_reply": "2023-09-14T02:37:05.308127Z" + }, + "papermill": { + "duration": 1.008439, + "end_time": "2023-09-14T02:37:05.311918", + "exception": false, + "start_time": "2023-09-14T02:37:04.303479", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8d16737a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:05.320892Z", + "iopub.status.busy": "2023-09-14T02:37:05.319751Z", + "iopub.status.idle": "2023-09-14T02:37:08.625564Z", + "shell.execute_reply": "2023-09-14T02:37:08.624420Z" + }, + "papermill": { + "duration": 3.312981, + "end_time": "2023-09-14T02:37:08.627991", + "exception": false, + "start_time": "2023-09-14T02:37:05.315010", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip3 install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "157915c9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:08.636451Z", + "iopub.status.busy": "2023-09-14T02:37:08.635808Z", + "iopub.status.idle": "2023-09-14T02:37:08.646285Z", + "shell.execute_reply": "2023-09-14T02:37:08.644957Z" + }, + "papermill": { + "duration": 0.017165, + "end_time": "2023-09-14T02:37:08.648279", + "exception": false, + "start_time": "2023-09-14T02:37:08.631114", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L6-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=6\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ed6bf7ff", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:08.656415Z", + "iopub.status.busy": "2023-09-14T02:37:08.655852Z", + "iopub.status.idle": "2023-09-14T02:37:32.430048Z", + "shell.execute_reply": "2023-09-14T02:37:32.428850Z" + }, + "papermill": { + "duration": 23.781188, + "end_time": "2023-09-14T02:37:32.432519", + "exception": false, + "start_time": "2023-09-14T02:37:08.651331", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-14 02:37:08-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.87, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", + "HTTP request sent, awaiting response... 302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-14 02:37:08-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "108.138.64.49, 108.138.64.121, 108.138.64.111, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.49|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200 OK\r\n", + "Length: 1066537217 (1017M) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’\r\n", + "\r\n", + "\r", + " v5r3-L6-D 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 1%[ ] 14.74M 63.3MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D20 2%[ ] 24.19M 47.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D204 3%[ ] 30.52M 38.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048 4%[ ] 45.26M 42.8MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048- 5%[> ] 54.89M 43.6MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E 6%[> ] 61.03M 41.1MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0 7%[> ] 75.78M 43.5MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_ 8%[> ] 87.34M 44.9MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_1 9%[> ] 91.55M 41.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 10%[=> ] 106.29M 43.3MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2048-E0_1-m 11%[=> ] 114.75M 43.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2048-E0_1-me 11%[=> ] 121.56M 42.5MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2048-E0_1-mem 12%[=> ] 122.07M 39.8MB/s eta 22s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2048-E0_1-mem- 13%[=> ] 137.33M 42.0MB/s eta 22s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2048-E0_1-mem-c 14%[=> ] 152.07M 43.6MB/s eta 22s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2048-E0_1-mem-ct 15%[==> ] 152.59M 41.2MB/s eta 22s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-mem-ctx 16%[==> ] 167.33M 41.1MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-mem-ctx- 16%[==> ] 167.85M 37.9MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-mem-ctx-5 17%[==> ] 181.27M 39.7MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-mem-ctx-51 18%[==> ] 183.10M 37.2MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-mem-ctx-512 19%[==> ] 198.36M 38.0MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-mem-ctx-512. 20%[===> ] 213.11M 40.5MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 22%[===> ] 228.36M 41.5MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 23%[===> ] 243.09M 42.1MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 24%[===> ] 254.13M 44.3MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 25%[====> ] 259.40M 41.5MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 27%[====> ] 274.66M 42.2MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 28%[====> ] 289.40M 44.8MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 29%[====> ] 298.47M 43.8MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 30%[=====> ] 305.18M 42.3MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 30%[=====> ] 313.62M 39.8MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 32%[=====> ] 333.86M 42.5MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 33%[=====> ] 341.22M 44.0MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 34%[=====> ] 350.95M 45.4MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 35%[======> ] 365.70M 44.4MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 36%[======> ] 371.37M 45.7MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 37%[======> ] 381.47M 44.0MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 39%[======> ] 396.73M 45.9MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 41%[=======> ] 419.01M 47.8MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 42%[=======> ] 427.25M 47.9MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 43%[=======> ] 442.51M 47.9MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 45%[========> ] 457.76M 46.8MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 46%[========> ] 473.02M 47.8MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 48%[========> ] 488.28M 47.6MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 49%[========> ] 507.63M 50.4MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 50%[=========> ] 512.48M 51.1MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 52%[=========> ] 532.75M 54.5MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 52%[=========> ] 534.05M 51.4MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 53%[=========> ] 547.49M 52.6MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 54%[=========> ] 557.13M 52.7MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 56%[==========> ] 569.63M 54.0MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 56%[==========> ] 579.31M 55.5MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 58%[==========> ] 594.57M 54.4MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 59%[==========> ] 600.21M 54.9MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D20 59%[==========> ] 602.76M 51.4MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D204 60%[===========> ] 610.35M 48.1MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048 61%[===========> ] 625.09M 48.0MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048- 62%[===========> ] 640.36M 48.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E 64%[===========> ] 655.62M 47.5MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0 64%[===========> ] 656.25M 43.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_ 65%[============> ] 669.55M 42.5MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_1 66%[============> ] 678.94M 43.2MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 67%[============> ] 686.64M 42.0MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2048-E0_1-m 69%[============> ] 701.90M 42.1MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2048-E0_1-me 70%[=============> ] 716.64M 42.5MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2048-E0_1-mem 70%[=============> ] 717.16M 41.4MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2048-E0_1-mem- 71%[=============> ] 724.85M 39.7MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2048-E0_1-mem-c 72%[=============> ] 732.42M 38.1MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2048-E0_1-mem-ct 73%[=============> ] 747.69M 38.4MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-mem-ctx 75%[==============> ] 762.94M 40.7MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-mem-ctx- 76%[==============> ] 777.68M 43.3MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-mem-ctx-5 77%[==============> ] 792.94M 42.8MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-mem-ctx-51 78%[==============> ] 793.46M 39.6MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-mem-ctx-512 79%[==============> ] 808.20M 41.4MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-mem-ctx-512. 79%[==============> ] 812.25M 39.4MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 81%[===============> ] 823.97M 40.9MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 82%[===============> ] 838.71M 42.4MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 82%[===============> ] 839.35M 41.6MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 85%[================> ] 866.03M 44.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 85%[================> ] 869.75M 42.1MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 87%[================> ] 885.01M 42.8MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 87%[================> ] 893.44M 44.4MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 88%[================> ] 900.27M 43.8MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 89%[================> ] 911.78M 42.6MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 90%[=================> ] 915.53M 41.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 91%[=================> ] 930.78M 40.9MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 92%[=================> ] 944.21M 41.4MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 94%[=================> ] 956.92M 42.2MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 94%[=================> ] 961.30M 42.7MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 96%[==================> ] 980.99M 44.8MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 97%[==================> ] 986.93M 42.2MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 98%[==================> ] 1004M 44.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 99%[==================> ] 1007M 42.6MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 100%[===================>] 1017M 45.2MB/s in 23s \r\n", + "\r\n", + "2023-09-14 02:37:32 (44.1 MB/s) - ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’ saved [1066537217/1066537217]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1018M\r\n", + "drwxr-xr-x 2 root root 4.0K Sep 14 02:37 .\r\n", + "drwxr-xr-x 20 root root 4.0K Sep 14 02:37 ..\r\n", + "-rw-r--r-- 1 root root 1018M Sep 13 20:28 v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "cell_type": "markdown", + "id": "5700b4b5", + "metadata": { + "papermill": { + "duration": 0.008064, + "end_time": "2023-09-14T02:37:32.449084", + "exception": false, + "start_time": "2023-09-14T02:37:32.441020", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 3 : Ramping up the ctx size (8192), memory training\n", + "\n", + "- Tune 3: Large ctx size (8192), Scaling up!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d5f911bc", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:32.468291Z", + "iopub.status.busy": "2023-09-14T02:37:32.467681Z", + "iopub.status.idle": "2023-09-14T02:37:52.531727Z", + "shell.execute_reply": "2023-09-14T02:37:52.530635Z" + }, + "papermill": { + "duration": 20.125122, + "end_time": "2023-09-14T02:37:52.582572", + "exception": false, + "start_time": "2023-09-14T02:37:32.457450", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 374 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 869 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 528 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1060 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 747 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1301 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 100 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 650 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 81 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1794 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 529 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 352 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 293 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 26 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 313 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 264 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 184 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 439 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 401 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 280 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 2607 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 21 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 482 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 267 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 48 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 5563 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 140 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 6.1G\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 21K Sep 14 02:37 gen-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 2.1M Sep 14 02:37 gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 20M Sep 14 02:37 gen-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 22M Sep 14 02:37 gen-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 23M Sep 14 02:37 gen-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 25M Sep 14 02:37 gen-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 27M Sep 14 02:37 gen-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 24K Sep 14 02:37 gen-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 29M Sep 14 02:37 gen-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 31M Sep 14 02:37 gen-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 33M Sep 14 02:37 gen-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 35M Sep 14 02:37 gen-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 37M Sep 14 02:37 gen-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 30K Sep 14 02:37 gen-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 4.0M Sep 14 02:37 gen-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 39M Sep 14 02:37 gen-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 41M Sep 14 02:37 gen-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 42M Sep 14 02:37 gen-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 44M Sep 14 02:37 gen-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 46M Sep 14 02:37 gen-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 35K Sep 14 02:37 gen-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 48M Sep 14 02:37 gen-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 50M Sep 14 02:37 gen-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 52M Sep 14 02:37 gen-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 54M Sep 14 02:37 gen-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 56M Sep 14 02:37 gen-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 39K Sep 14 02:37 gen-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 5.9M Sep 14 02:37 gen-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 58M Sep 14 02:37 gen-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 60M Sep 14 02:37 gen-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 61M Sep 14 02:37 gen-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 63M Sep 14 02:37 gen-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 65M Sep 14 02:37 gen-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 45K Sep 14 02:37 gen-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 67M Sep 14 02:37 gen-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 69M Sep 14 02:37 gen-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 71M Sep 14 02:37 gen-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 73M Sep 14 02:37 gen-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 75M Sep 14 02:37 gen-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 49K Sep 14 02:37 gen-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 7.8M Sep 14 02:37 gen-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 77M Sep 14 02:37 gen-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 79M Sep 14 02:37 gen-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 80M Sep 14 02:37 gen-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 82M Sep 14 02:37 gen-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 84M Sep 14 02:37 gen-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 54K Sep 14 02:37 gen-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 86M Sep 14 02:37 gen-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 88M Sep 14 02:37 gen-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 90M Sep 14 02:37 gen-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 92M Sep 14 02:37 gen-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 94M Sep 14 02:37 gen-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 15K Sep 14 02:37 gen-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 57K Sep 14 02:37 gen-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 9.7M Sep 14 02:37 gen-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 96M Sep 14 02:37 gen-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 97M Sep 14 02:37 gen-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 99M Sep 14 02:37 gen-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 101M Sep 14 02:37 gen-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 103M Sep 14 02:37 gen-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 62K Sep 14 02:37 gen-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 105M Sep 14 02:37 gen-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 107M Sep 14 02:37 gen-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 109M Sep 14 02:37 gen-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 111M Sep 14 02:37 gen-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 113M Sep 14 02:37 gen-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 68K Sep 14 02:37 gen-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 12M Sep 14 02:37 gen-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 115M Sep 14 02:37 gen-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 117M Sep 14 02:37 gen-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 118M Sep 14 02:37 gen-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 120M Sep 14 02:37 gen-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 122M Sep 14 02:37 gen-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 73K Sep 14 02:37 gen-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 124M Sep 14 02:37 gen-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 126M Sep 14 02:37 gen-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 128M Sep 14 02:37 gen-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 130M Sep 14 02:37 gen-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 132M Sep 14 02:37 gen-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 79K Sep 14 02:37 gen-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 14M Sep 14 02:37 gen-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 134M Sep 14 02:37 gen-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 136M Sep 14 02:37 gen-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 137M Sep 14 02:37 gen-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 139M Sep 14 02:37 gen-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 141M Sep 14 02:37 gen-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 83K Sep 14 02:37 gen-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 143M Sep 14 02:37 gen-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 145M Sep 14 02:37 gen-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 147M Sep 14 02:37 gen-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 149M Sep 14 02:37 gen-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 151M Sep 14 02:37 gen-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 86K Sep 14 02:37 gen-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 16M Sep 14 02:37 gen-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 153M Sep 14 02:37 gen-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 88K Sep 14 02:37 gen-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 18M Sep 14 02:37 gen-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 500K Sep 14 02:37 shuffle-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 525K Sep 14 02:37 shuffle-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 432K Sep 14 02:37 shuffle-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 393K Sep 14 02:37 shuffle-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 540K Sep 14 02:37 shuffle-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 517K Sep 14 02:37 shuffle-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 357K Sep 14 02:37 shuffle-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 512K Sep 14 02:37 shuffle-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 510K Sep 14 02:37 shuffle-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 334K Sep 14 02:37 shuffle-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 534K Sep 14 02:37 shuffle-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 322K Sep 14 02:37 shuffle-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 314K Sep 14 02:37 shuffle-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 317K Sep 14 02:37 shuffle-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 822K Sep 14 02:37 shuffle-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 311K Sep 14 02:37 shuffle-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 301K Sep 14 02:37 shuffle-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 300K Sep 14 02:37 shuffle-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 530K Sep 14 02:37 shuffle-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 284K Sep 14 02:37 shuffle-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 291K Sep 14 02:37 shuffle-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 283K Sep 14 02:37 shuffle-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 286K Sep 14 02:37 shuffle-word-95-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Go to config dir\n", + "cd \"../\"\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We reduce the training set for < 50 words - and shift the focus upwards\n", + "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n", + "#\n", + "for i in {5..100..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 4200 words dataset\n", + "# \n", + "for i in {100..8000..100} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n", + " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -lh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "af9b83d3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:37:52.652229Z", + "iopub.status.busy": "2023-09-14T02:37:52.651608Z", + "iopub.status.idle": "2023-09-14T02:38:21.558685Z", + "shell.execute_reply": "2023-09-14T02:38:21.557835Z" + }, + "papermill": { + "duration": 28.945501, + "end_time": "2023-09-14T02:38:21.561935", + "exception": false, + "start_time": "2023-09-14T02:37:52.616434", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-14 02:37:56,909] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1547623296\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 1547623296\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\r\n", + "TPU available: False, using: 0 TPU cores\r\n", + "IPU available: False, using: 0 IPUs\r\n", + "HPU available: False, using: 0 HPUs\r\n", + "\r\n", + "\r\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n", + " - target_batch_size: 256\r\n", + " - num_nodes: 1\r\n", + " - num_devices: 1\r\n", + " - accumulate_grad_batches: 256\r\n", + " - effective_batch_size: 256\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Resolving data files: 0%| | 0/198 [00:00: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n", + "\r", + "Generating train split: 100 examples [00:01, 58.74 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 4119 examples [00:01, 3148.90 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 9991 examples [00:01, 8377.41 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 16136 examples [00:02, 14631.65 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 21292 examples [00:02, 19123.98 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 42124 examples [00:02, 48567.56 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 50633 examples [00:02, 48135.15 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 59623 examples [00:02, 55584.20 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 67337 examples [00:02, 53321.94 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 74360 examples [00:02, 50060.00 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 80571 examples [00:03, 46642.17 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 85925 examples [00:03, 46695.96 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 91087 examples [00:03, 42287.60 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 95851 examples [00:03, 41277.70 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 100328 examples [00:03, 41903.98 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 105109 examples [00:03, 43330.01 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 109873 examples [00:03, 38887.49 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 114106 examples [00:03, 38800.38 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 118536 examples [00:04, 40082.20 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 122694 examples [00:04, 38268.69 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 127281 examples [00:04, 38358.03 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 131204 examples [00:04, 37584.81 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 135020 examples [00:04, 32997.15 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 139737 examples [00:04, 35623.56 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 143828 examples [00:04, 36646.73 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 147603 examples [00:04, 34959.40 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 151330 examples [00:05, 28703.38 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 154519 examples [00:05, 26946.66 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 157445 examples [00:05, 25877.83 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 160186 examples [00:05, 24367.50 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 162774 examples [00:05, 23524.07 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 165326 examples [00:05, 21662.50 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 167801 examples [00:05, 18798.65 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 169818 examples [00:06, 17953.36 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 171755 examples [00:06, 11617.64 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 171974 examples [00:06, 26184.81 examples/s]\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "multiprocess.pool.RemoteTraceback: \r\n", + "\"\"\"\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n", + " dataset = json.load(f)\r\n", + " File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n", + " return loads(fp.read(),\r\n", + " File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n", + " return _default_decoder.decode(s)\r\n", + " File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n", + " raise JSONDecodeError(\"Extra data\", s, end)\r\n", + "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1231)\r\n", + "\r\n", + "During handling of the above exception, another exception occurred:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n", + " for _, table in generator:\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n", + " raise e\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n", + " pa_table = paj.read_json(\r\n", + " File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n", + " File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n", + " File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n", + "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n", + "\r\n", + "The above exception was the direct cause of the following exception:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n", + " result = (True, func(*args, **kwds))\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n", + " for i, result in enumerate(func(**kwargs)):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n", + " raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n", + "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n", + "\"\"\"\r\n", + "\r\n", + "The above exception was the direct cause of the following exception:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n", + " builder_instance.download_and_prepare(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n", + " self._download_and_prepare(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n", + " self._prepare_split(split_generator, **prepare_split_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n", + " for job_id, done, content in iflatmap_unordered(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n", + " [async_result.get(timeout=0.05) for async_result in async_results]\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in \r\n", + " [async_result.get(timeout=0.05) for async_result in async_results]\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n", + " raise self._value\r\n", + "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: - 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v54\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n", + " --model.lr_init=4e-4 \\\n", + " --model.lr_final=2e-4 \\\n", + " --data.max_token_size=8192 \\\n", + " --data.sort_by_length=True \\\n", + " --model.ctx_len=4096 \\\n", + " --model.bptt_learning_range=2 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6db19b87", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:38:21.728358Z", + "iopub.status.busy": "2023-09-14T02:38:21.727768Z", + "iopub.status.idle": "2023-09-14T02:38:25.677206Z", + "shell.execute_reply": "2023-09-14T02:38:25.676074Z" + }, + "papermill": { + "duration": 3.986198, + "end_time": "2023-09-14T02:38:25.679730", + "exception": false, + "start_time": "2023-09-14T02:38:21.693532", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-14 02:38:24,136] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ec391cb3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:38:25.748202Z", + "iopub.status.busy": "2023-09-14T02:38:25.747247Z", + "iopub.status.idle": "2023-09-14T02:38:26.013713Z", + "shell.execute_reply": "2023-09-14T02:38:26.012573Z" + }, + "papermill": { + "duration": 0.303358, + "end_time": "2023-09-14T02:38:26.016073", + "exception": false, + "start_time": "2023-09-14T02:38:25.712715", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2748101d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T02:38:26.085118Z", + "iopub.status.busy": "2023-09-14T02:38:26.084169Z", + "iopub.status.idle": "2023-09-14T02:38:26.352535Z", + "shell.execute_reply": "2023-09-14T02:38:26.351341Z" + }, + "papermill": { + "duration": 0.305573, + "end_time": "2023-09-14T02:38:26.354898", + "exception": false, + "start_time": "2023-09-14T02:38:26.049325", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 83.790874, + "end_time": "2023-09-14T02:38:26.808961", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb", + "parameters": {}, + "start_time": "2023-09-14T02:37:03.018087", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file