diff --git a/.gitattributes b/.gitattributes index b6d19782bcbc7fe236d0826bfe059c5aa9f6ba2a..3e6c57fc849ccd3aaccfca0b40699274402ee0d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -86,22 +86,3 @@ experiment/memory-bench/logs/v5-L6-D1024-E0_1-4k.csv filter=lfs diff=lfs merge=l experiment/memory-bench/logs/v5-L6-D1024-E0_1-16k.csv filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb filter=lfs diff=lfs merge=lfs -text diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index df9e649b821c97a2e5912e7f73555d6ed6cb4133..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c447bfd1844d0c3e536fb8824d029fd8b0e334e1368f807a4e85cd7099005130 -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 7e510b454e51f3e1360458d4aa4bc8f97480caba..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70b243f59685c4df841f16343bc7ff6947a3125cec5dabf9035b28b65c04da0e -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index af865daa0e29c34250db338e077056e58b7bba5e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a57c278ed7e7e2f9d7f0436540674bfa5178adcd04c3154f5d92992e0602c55b -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index 613ae00e6b94d2f54a518f1072636f21e570c85e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06105d96413046fce0ec189b9c4685a813cfa7147300851c5d2afc7b5adbcb38 -size 1721189797 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb deleted file mode 100644 index 04f75fe75d5a993aa7050629019cd0e3cf72c508..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31f61ce42e82d9a475446458ed015a190f16dd9b2b17bd67f4feedd9f72750ad -size 16577145 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb deleted file mode 100644 index 2caac2060cc81dbfc7e4840004960eeed06d0e29..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b53c27ed2c20b9f1f690647a83c0fbe2ce09594518b9ec557f515a4f8b548f2b -size 15941299 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 2f736f75a1664aecb04c1e0fe217b71a77aecbf5..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c380bcd4b861a8af263fd56dc6e183b9e06ba0bc8f9895c4dcd8a678b58296e8 -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth deleted file mode 100644 index 7f96f25f0aac70ea5a9c88a5208d0c071bd9fee9..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89f8caf661887bdba1897a10009f033331c552bfb763112e6da1b850d8ec3ff7 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth deleted file mode 100644 index 15f17b01a2826359ce6ac3f3bea9b310b2b596e3..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2680e091197e798686c97bdd2af0f6827f2b29c648cc1ae03f67d6f094859618 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth deleted file mode 100644 index 8bb49e3d9132afcd95cecfa46932131d2971c1e2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35a5d7571d90160edc20ce95abfdbcb6109ad47eccdefe8051bd8f15d12bf326 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth deleted file mode 100644 index ac0f774b491b1af1dec3e65871dcb2618a295104..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6b50bf05f191da87a6a17072d485d4059a4ded1335605e6b7bb8e9f2648d966 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb deleted file mode 100644 index ae3b85f4a8acf6c3001f445ade22f015a8d52327..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb +++ /dev/null @@ -1,2461 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "d3126ef2", - "metadata": { - "papermill": { - "duration": 0.004879, - "end_time": "2023-10-11T08:02:23.608034", - "exception": false, - "start_time": "2023-10-11T08:02:23.603155", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# RWKV v5 multi-size training experiment\n", - "\n", - "**Note:** This project assumes you have the rwkv-infctx conda env setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "986070aa", - "metadata": { - "papermill": { - "duration": 0.002523, - "end_time": "2023-10-11T08:02:23.613605", - "exception": false, - "start_time": "2023-10-11T08:02:23.611082", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Basic Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "dc924c7f", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:23.620990Z", - "iopub.status.busy": "2023-10-11T08:02:23.620432Z", - "iopub.status.idle": "2023-10-11T08:02:24.379549Z", - "shell.execute_reply": "2023-10-11T08:02:24.378580Z" - }, - "papermill": { - "duration": 0.765369, - "end_time": "2023-10-11T08:02:24.381741", - "exception": false, - "start_time": "2023-10-11T08:02:23.616372", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# First lets setup the various directories, and init the model\n", - "!mkdir -p ../../../../model/\n", - "!mkdir -p ../../../../datapath/\n", - "!mkdir -p ../../../../checkpoint/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2bbc32ac", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.389788Z", - "iopub.status.busy": "2023-10-11T08:02:24.389227Z", - "iopub.status.idle": "2023-10-11T08:02:24.398441Z", - "shell.execute_reply": "2023-10-11T08:02:24.397578Z" - }, - "papermill": { - "duration": 0.015548, - "end_time": "2023-10-11T08:02:24.400362", - "exception": false, - "start_time": "2023-10-11T08:02:24.384814", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_2_offload\n", - "ENABLE_WANDB: True\n", - "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train\n", - "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" - ] - } - ], - "source": [ - "DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n", - "GPU_DEVICES=\"auto\"\n", - "ENABLE_WANDB=True\n", - "\n", - "EMBED_SCALE=0.01\n", - "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", - "\n", - "EMBED_SIZE=2048\n", - "\n", - "WANDB_PREFIX=f\"[Multi-size] v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n", - "FILENAME_PREFIX=f\"v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n", - "\n", - "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", - "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", - "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", - "\n", - "if ENABLE_WANDB:\n", - " WANDB_MODE=\"online\"\n", - "else:\n", - " WANDB_MODE=\"disabled\"\n", - "\n", - "# Computing the notebook, and various paths\n", - "import os\n", - "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", - "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", - "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "\n", - "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", - "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", - "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", - "print(\"PROJECT_DIR:\", PROJECT_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ffa69634", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.408311Z", - "iopub.status.busy": "2023-10-11T08:02:24.407798Z", - "iopub.status.idle": "2023-10-11T08:03:19.634663Z", - "shell.execute_reply": "2023-10-11T08:03:19.633765Z" - }, - "papermill": { - "duration": 55.233419, - "end_time": "2023-10-11T08:03:19.636895", - "exception": false, - "start_time": "2023-10-11T08:02:24.403476", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:24-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.7, 18.154.227.69, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:24-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.69, 3.162.112.2, 3.162.112.100, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.69|:443... connected.\r\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 15.26M 42.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 3%[ ] 30.52M 47.7MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 4%[ ] 45.26M 51.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 5%[> ] 59.20M 52.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 6%[> ] 65.20M 48.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 7%[> ] 76.29M 44.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 8%[> ] 91.03M 47.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 9%[> ] 91.55M 43.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 10%[=> ] 106.81M 43.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 11%[=> ] 120.25M 43.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 41.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 136.81M 42.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.8MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 15%[==> ] 152.72M 40.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 16%[==> ] 167.85M 41.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 43.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 19%[==> ] 198.36M 43.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 20%[===> ] 213.11M 44.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 22%[===> ] 228.36M 43.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 22%[===> ] 228.87M 41.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 24%[===> ] 244.13M 41.0MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 25%[====> ] 259.40M 42.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 26%[====> ] 272.83M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 28%[====> ] 289.40M 41.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 28%[====> ] 289.92M 37.9MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 29%[====> ] 304.66M 36.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 30%[=====> ] 305.18M 33.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 31%[=====> ] 318.60M 33.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 31%[=====> ] 320.29M 33.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 31%[=====> ] 320.57M 30.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 32%[=====> ] 335.18M 30.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 33%[=====> ] 345.53M 31.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 34%[=====> ] 350.82M 29.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 35%[======> ] 360.98M 31.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 36%[======> ] 366.20M 29.6MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 37%[======> ] 380.96M 30.8MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 38%[======> ] 392.79M 32.0MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 39%[======> ] 396.73M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 40%[=======> ] 411.99M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 41%[=======> ] 426.73M 28.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 42%[=======> ] 427.25M 29.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 42%[=======> ] 435.25M 27.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 43%[=======> ] 438.04M 28.2MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 43%[=======> ] 442.05M 29.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 43%[=======> ] 446.00M 31.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 44%[=======> ] 457.24M 33.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 45%[========> ] 457.89M 31.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 46%[========> ] 473.02M 34.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 48%[========> ] 488.28M 34.1MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 49%[========> ] 503.03M 34.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 50%[=========> ] 518.29M 37.3MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 51%[=========> ] 525.10M 35.8MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 52%[=========> ] 534.05M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 53%[=========> ] 548.80M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 55%[==========> ] 562.75M 33.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 56%[==========> ] 579.31M 36.0MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 57%[==========> ] 581.49M 36.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 58%[==========> ] 592.93M 37.4MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 58%[==========> ] 595.09M 37.1MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 60%[===========> ] 610.35M 38.5MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 61%[===========> ] 625.61M 38.7MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 62%[===========> ] 640.36M 39.9MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 64%[===========> ] 653.30M 39.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 64%[===========> ] 656.13M 38.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 66%[============> ] 671.38M 38.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 67%[============> ] 685.57M 39.7MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 67%[============> ] 686.64M 37.5MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 68%[============> ] 701.39M 37.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 69%[============> ] 708.59M 38.8MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 70%[=============> ] 715.34M 38.2MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 71%[=============> ] 731.91M 40.7MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 73%[=============> ] 747.17M 38.0MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 73%[=============> ] 747.75M 38.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 74%[=============> ] 762.42M 40.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 75%[==============> ] 762.94M 37.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 76%[==============> ] 776.37M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 76%[==============> ] 778.20M 34.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 77%[==============> ] 791.63M 38.1MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 78%[==============> ] 793.46M 36.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 79%[==============> ] 808.20M 38.6MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 80%[===============> ] 816.07M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 81%[===============> ] 823.97M 34.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 82%[===============> ] 837.41M 36.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 83%[===============> ] 853.98M 38.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 85%[================> ] 867.67M 38.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 85%[================> ] 873.17M 39.1MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 87%[================> ] 885.01M 36.1MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 88%[================> ] 899.75M 37.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 88%[================> ] 900.40M 34.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 90%[=================> ] 915.53M 35.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 91%[=================> ] 930.78M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 92%[=================> ] 945.53M 40.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 93%[=================> ] 946.04M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 94%[=================> ] 959.48M 38.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 94%[=================> ] 961.30M 33.1MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 95%[==================> ] 976.05M 34.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 97%[==================> ] 991.31M 34.8MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 97%[==================> ] 992.94M 35.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 98%[==================> ] 1005M 34.7MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 99%[==================> ] 1016M 33.7MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 33.9MB/s in 28s \r\n", - "\r\n", - "2023-10-11 08:02:52 (36.4 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:53-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.7, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:53-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.95, 3.162.112.100, 3.162.112.2, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.95|:443... connected.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 14.74M 67.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 2%[ ] 28.69M 63.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 3%[ ] 30.52M 42.3MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 4%[ ] 45.26M 45.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 4%[ ] 45.78M 37.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 6%[> ] 61.03M 41.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 7%[> ] 75.78M 45.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 8%[> ] 85.94M 45.6MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 9%[> ] 91.55M 40.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 10%[=> ] 106.81M 40.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 40.2MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 137.33M 41.7MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.9MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 16%[==> ] 167.33M 43.6MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 17%[==> ] 181.32M 44.9MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 41.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 19%[==> ] 196.53M 41.8MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 19%[==> ] 198.36M 39.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 20%[===> ] 213.11M 39.0MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 21%[===> ] 220.29M 40.8MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 22%[===> ] 228.36M 39.9MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 24%[===> ] 244.13M 40.3MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 25%[====> ] 259.40M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 26%[====> ] 274.14M 42.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 27%[====> ] 274.66M 38.5MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 28%[====> ] 289.92M 41.6MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 30%[=====> ] 305.18M 41.6MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 31%[=====> ] 320.43M 40.9MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 32%[=====> ] 335.18M 41.2MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 33%[=====> ] 335.69M 38.5MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 34%[=====> ] 350.95M 38.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 35%[======> ] 365.70M 40.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 36%[======> ] 366.20M 38.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 37%[======> ] 381.47M 38.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 37%[======> ] 385.65M 39.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 39%[======> ] 396.73M 36.3MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 39%[======> ] 406.75M 37.5MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 40%[=======> ] 411.99M 33.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 42%[=======> ] 427.25M 33.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 43%[=======> ] 441.98M 32.7MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 43%[=======> ] 442.51M 32.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 44%[=======> ] 457.25M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 45%[========> ] 457.76M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 46%[========> ] 472.50M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 46%[========> ] 473.02M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 48%[========> ] 488.28M 30.9MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 49%[========> ] 503.54M 33.6MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 50%[=========> ] 518.29M 34.2MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 51%[=========> ] 518.80M 34.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 52%[=========> ] 534.05M 34.2MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 54%[=========> ] 549.31M 37.5MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 55%[==========> ] 564.06M 37.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 55%[==========> ] 565.78M 37.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 57%[==========> ] 579.83M 37.8MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 58%[==========> ] 595.09M 39.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 60%[===========> ] 610.35M 40.9MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 61%[===========> ] 625.47M 44.1MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 61%[===========> ] 629.82M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 63%[===========> ] 640.87M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 64%[===========> ] 656.13M 45.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 66%[============> ] 671.38M 45.8MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 67%[============> ] 686.64M 47.1MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 69%[============> ] 701.90M 47.0MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 70%[=============> ] 717.16M 46.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 71%[=============> ] 730.60M 47.8MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 73%[=============> ] 747.17M 45.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 74%[=============> ] 755.98M 45.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 75%[==============> ] 762.94M 43.2MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 76%[==============> ] 777.68M 45.4MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 76%[==============> ] 778.32M 42.7MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 78%[==============> ] 793.46M 42.0MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 79%[==============> ] 808.20M 41.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 80%[===============> ] 814.09M 42.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 80%[===============> ] 823.46M 41.0MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 81%[===============> ] 823.97M 40.5MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 82%[===============> ] 838.71M 38.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 83%[===============> ] 853.98M 41.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 84%[===============> ] 854.61M 38.0MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 85%[================> ] 869.24M 35.7MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 85%[================> ] 869.75M 35.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 86%[================> ] 875.74M 34.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 87%[================> ] 885.01M 32.5MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 88%[================> ] 900.27M 33.8MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 89%[================> ] 913.70M 34.5MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 90%[=================> ] 924.21M 34.8MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 91%[=================> ] 930.27M 35.3MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 93%[=================> ] 946.04M 34.9MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 94%[=================> ] 961.30M 37.2MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 95%[==================> ] 970.14M 35.7MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 96%[==================> ] 976.55M 34.9MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 97%[==================> ] 991.82M 37.0MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 98%[==================> ] 998.13M 35.6MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 98%[==================> ] 1007M 37.2MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 99%[==================> ] 1016M 34.9MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 35.1MB/s in 26s \r\n", - "\r\n", - "2023-10-11 08:03:19 (38.9 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - } - ], - "source": [ - "# Get the init split model, and finetune from there\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2a3cd2d1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:19.666619Z", - "iopub.status.busy": "2023-10-11T08:03:19.665958Z", - "iopub.status.idle": "2023-10-11T08:03:29.305787Z", - "shell.execute_reply": "2023-10-11T08:03:29.304873Z" - }, - "papermill": { - "duration": 9.658186, - "end_time": "2023-10-11T08:03:29.308744", - "exception": false, - "start_time": "2023-10-11T08:03:19.650558", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Saving the dataset (0/2 shards): 0%| | 0/27200 [00:00\r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2a.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion A3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2a.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "53867c42", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:46.969471Z", - "iopub.status.busy": "2023-10-11T08:03:46.969019Z", - "iopub.status.idle": "2023-10-11T08:03:50.682437Z", - "shell.execute_reply": "2023-10-11T08:03:50.680986Z" - }, - "papermill": { - "duration": 3.732808, - "end_time": "2023-10-11T08:03:50.685581", - "exception": false, - "start_time": "2023-10-11T08:03:46.952773", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:49,278] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5688e577", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:50.806267Z", - "iopub.status.busy": "2023-10-11T08:03:50.804997Z", - "iopub.status.idle": "2023-10-11T08:03:56.788036Z", - "shell.execute_reply": "2023-10-11T08:03:56.786568Z" - }, - "papermill": { - "duration": 6.08675, - "end_time": "2023-10-11T08:03:56.790510", - "exception": false, - "start_time": "2023-10-11T08:03:50.703760", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:54,934] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"cuda fp32\"" - ] - }, - { - "cell_type": "markdown", - "id": "b4927e87", - "metadata": { - "papermill": { - "duration": 0.015295, - "end_time": "2023-10-11T08:03:56.820640", - "exception": false, - "start_time": "2023-10-11T08:03:56.805345", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Enwiki Stage 3 : Split-Baseline-B training" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6bdd285a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:56.853495Z", - "iopub.status.busy": "2023-10-11T08:03:56.852946Z", - "iopub.status.idle": "2023-10-11T08:04:11.500794Z", - "shell.execute_reply": "2023-10-11T08:04:11.499336Z" - }, - "papermill": { - "duration": 14.668001, - "end_time": "2023-10-11T08:04:11.503644", - "exception": false, - "start_time": "2023-10-11T08:03:56.835643", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:01,096] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'].\r\n", - " rank_zero_warn(\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1732922148\r\n", - " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", - "Global seed set to 1732922148\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 278, in \r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2b.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion B3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2b.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ae4623a1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:11.546046Z", - "iopub.status.busy": "2023-10-11T08:04:11.544870Z", - "iopub.status.idle": "2023-10-11T08:04:15.274349Z", - "shell.execute_reply": "2023-10-11T08:04:15.272957Z" - }, - "papermill": { - "duration": 3.754115, - "end_time": "2023-10-11T08:04:15.277163", - "exception": false, - "start_time": "2023-10-11T08:04:11.523048", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:13,869] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8e1b1152", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:15.319747Z", - "iopub.status.busy": "2023-10-11T08:04:15.318636Z", - "iopub.status.idle": "2023-10-11T08:04:21.268526Z", - "shell.execute_reply": "2023-10-11T08:04:21.267073Z" - }, - "papermill": { - "duration": 5.974644, - "end_time": "2023-10-11T08:04:21.271495", - "exception": false, - "start_time": "2023-10-11T08:04:15.296851", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:19,430] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"cuda fp32\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "papermill": { - "default_parameters": {}, - "duration": 119.315066, - "end_time": "2023-10-11T08:04:21.714050", - "environment_variables": {}, - "exception": null, - "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "parameters": {}, - "start_time": "2023-10-11T08:02:22.398984", - "version": "2.4.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb deleted file mode 100644 index 31a5b8eeab80f4fb0b5a736155d2fd141fa7fd54..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0aa2c37ab25e53ed3e45a9e7b5b09d1ac2d2f627412df5c98cc1f113838d800 -size 15734950 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb deleted file mode 100644 index 9810fb95056168b6f333635a6ad59587d31b6e23..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d643e2a64a0f7323eb7b14b90ce5a0e5457818349c75e666dbf52b7319f5de72 -size 15733849 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index b42c1d46426286791c4b684a05f90055dccae4d1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:849b57b4d493d40313ef04b30ffc22ec6f5cb99e05225615ee0cb00acb78a95d -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index e671afa6d1c25ea33703bbbdf389a33493910501..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8413565273ef40f61db246dcbf793e045b39d1163e18885441be5a16d733f34c -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index b5857b83e411d72861863eda5c9c32a7132e1bfe..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:235d88b0aa939596392f2b5734a426940535816aa13106498974a809051a4c75 -size 1066537217 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth deleted file mode 100644 index e145614e20e99af77e84454e6ef16a39a61c1d9f..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1afd8d92632792f498805ac222d159524badf4ecbcaaae597060b6bb87a53110 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth deleted file mode 100644 index 381a48603dc68a10750a4b7d78e79594e6bde52d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e61d8f8901d1eb50759f0242e2886678ed24b9931295a270b14120ba74cb5c3 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index ded0f392eb463040cbb0e4a66326c5ae08bcbda6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2d60ede71bc384ee4eff0a591b3fa57dd670c27e5e8ce5eadf25a7f0d7e226d -size 1066538337 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth deleted file mode 100644 index c498833cf2e305eacbd6ebd9485e9a5d6706eca2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth deleted file mode 100644 index b1bfb4e806da5dde645c9feb2acb0b0140ce43c6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25 -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth deleted file mode 100644 index bfe873e0bdd09173577c50c9f6f3634155ade0ce..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f10f8f00c42b6408db81a3b26d53411c41edc7f23f5097ac095ad3096d6c5dc1 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth deleted file mode 100644 index f2aa96bd9b7f4e604e397947323f5156ee2fa129..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f07a8414cd0cd1c3df705dff8a0f2142231171ee52a94d12c55dfe7c888fef7 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb deleted file mode 100644 index e04ec817954792ce45a871de0ebed229db957ffd..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fccffc430231ad06fdb02a7e50ea57acfbeae3c42a97b018f62f937d30736e4 -size 16519239 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb deleted file mode 100644 index 955412d6f333912148d0dc1023c32ce58509ccd2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7260b3fe80de461d6dc923b21af87361f71e26a4a7191d51dd9665403728ddfa -size 15732960 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb deleted file mode 100644 index 5c78f23bf3d5e33fcb6836c803015836a2da0149..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f95adf89d498a4dd58af22ba192b2fd4d08ceec250784c7e9f6f9b8de0fed2bc -size 15855123 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb deleted file mode 100644 index 50ba5f3c8e80bcfb1a8005406d9e4f78979d8dac..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c308e5ae9f8fde5fd24cafccf60917dca9c97fc2e0a5fbcfa01027d6d50e927d -size 16623766 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb index 08a47ab9cc56551d38f353b7abad52bfde0da722..e6f4cfe2a9763734c0025be65b4a39e93e9ecb3f 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb @@ -1,3 +1,140490 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0db5673fbf59261a65cafb957510a87538f738b6cebffd10ed532db38dfdcb01 -size 53132732 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "3174f701", + "metadata": { + "papermill": { + "duration": 0.005065, + "end_time": "2023-09-06T17:12:47.606560", + "exception": false, + "start_time": "2023-09-06T17:12:47.601495", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "cb5debdd", + "metadata": { + "papermill": { + "duration": 0.002115, + "end_time": "2023-09-06T17:12:47.612606", + "exception": false, + "start_time": "2023-09-06T17:12:47.610491", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "41bbf98d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:47.618377Z", + "iopub.status.busy": "2023-09-06T17:12:47.618157Z", + "iopub.status.idle": "2023-09-06T17:12:48.494513Z", + "shell.execute_reply": "2023-09-06T17:12:48.493600Z" + }, + "papermill": { + "duration": 0.881639, + "end_time": "2023-09-06T17:12:48.496472", + "exception": false, + "start_time": "2023-09-06T17:12:47.614833", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bc308e46", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:48.506904Z", + "iopub.status.busy": "2023-09-06T17:12:48.506660Z", + "iopub.status.idle": "2023-09-06T17:12:50.610312Z", + "shell.execute_reply": "2023-09-06T17:12:50.609442Z" + }, + "papermill": { + "duration": 2.110898, + "end_time": "2023-09-06T17:12:50.612132", + "exception": false, + "start_time": "2023-09-06T17:12:48.501234", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5ecce62b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:50.622765Z", + "iopub.status.busy": "2023-09-06T17:12:50.622510Z", + "iopub.status.idle": "2023-09-06T17:12:50.631551Z", + "shell.execute_reply": "2023-09-06T17:12:50.630955Z" + }, + "papermill": { + "duration": 0.01615, + "end_time": "2023-09-06T17:12:50.633066", + "exception": false, + "start_time": "2023-09-06T17:12:50.616916", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=12\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ecee273d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:50.643057Z", + "iopub.status.busy": "2023-09-06T17:12:50.642933Z", + "iopub.status.idle": "2023-09-06T17:13:06.898900Z", + "shell.execute_reply": "2023-09-06T17:13:06.898162Z" + }, + "papermill": { + "duration": 16.262552, + "end_time": "2023-09-06T17:13:06.900660", + "exception": false, + "start_time": "2023-09-06T17:12:50.638108", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 17:12:50-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.55, 13.33.33.102, 13.33.33.110, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 17:12:51-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18.155.68.98, 18.155.68.128, 18.155.68.94, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.98|:443... connected.\r\n", + "HTTP request sent, awaiting response... 200 OK\r\n", + "Length: 1721187013 (1.6G) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’\r\n", + "\r\n", + "\r", + " v5r3-L12- 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 1%[ ] 21.42M 107MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 2%[ ] 43.83M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 4%[ ] 66.17M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 5%[> ] 88.57M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 6%[> ] 110.92M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 8%[> ] 133.36M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 9%[> ] 155.76M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 10%[=> ] 178.16M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 12%[=> ] 200.56M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 13%[=> ] 222.97M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 14%[=> ] 244.89M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 16%[==> ] 267.32M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 17%[==> ] 289.71M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 19%[==> ] 312.11M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 20%[===> ] 334.51M 111MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 21%[===> ] 356.91M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 23%[===> ] 379.31M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 24%[===> ] 401.72M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 25%[====> ] 424.13M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 27%[====> ] 446.53M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 28%[====> ] 468.94M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 29%[====> ] 491.34M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.75M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 32%[=====> ] 536.15M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 34%[=====> ] 558.51M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 35%[======> ] 580.91M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 36%[======> ] 603.30M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 38%[======> ] 625.71M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 39%[======> ] 648.10M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 40%[=======> ] 670.50M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 42%[=======> ] 692.78M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 43%[=======> ] 715.16M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 44%[=======> ] 737.57M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 46%[========> ] 759.99M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 47%[========> ] 782.39M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-4k.pth 49%[========> ] 804.80M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4k.pth 50%[=========> ] 827.18M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 51%[=========> ] 849.60M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 53%[=========> ] 872.00M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 54%[=========> ] 894.41M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 55%[==========> ] 916.82M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 939.22M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 58%[==========> ] 961.63M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 59%[==========> ] 984.03M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 61%[===========> ] 1006M 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 62%[===========> ] 1.00G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 64%[===========> ] 1.03G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 65%[============> ] 1.05G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 66%[============> ] 1.07G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L1 68%[============> ] 1.09G 112MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12 68%[============> ] 1.09G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12- 69%[============> ] 1.11G 103MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 70%[=============> ] 1.13G 101MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 71%[=============> ] 1.14G 98.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 72%[=============> ] 1.16G 96.7MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 73%[=============> ] 1.17G 93.8MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 74%[=============> ] 1.19G 93.4MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 75%[==============> ] 1.21G 91.2MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 76%[==============> ] 1.22G 89.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 77%[==============> ] 1.24G 89.3MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 78%[==============> ] 1.26G 85.2MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 79%[==============> ] 1.28G 85.1MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 81%[===============> ] 1.30G 85.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 82%[===============> ] 1.32G 84.6MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 83%[===============> ] 1.34G 85.0MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 85%[================> ] 1.37G 86.9MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 86%[================> ] 1.39G 92.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 87%[================> ] 1.41G 94.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 89%[================> ] 1.43G 96.2MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 90%[=================> ] 1.45G 99.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 92%[=================> ] 1.47G 101MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 93%[=================> ] 1.50G 104MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 94%[=================> ] 1.52G 105MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 96%[==================> ] 1.54G 107MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 97%[==================> ] 1.56G 107MB/s eta 0s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 98%[==================> ] 1.58G 111MB/s eta 0s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 112MB/s in 15s \r\n", + "\r\n", + "2023-09-06 17:13:06 (106 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1.5G\r\n", + "drwxr-xr-x 2 root root 3 Sep 6 17:12 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 6 17:12 ..\r\n", + "-rw-r--r-- 1 root root 1.7G Sep 6 15:04 v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "326602ab", + "metadata": { + "papermill": { + "duration": 0.005225, + "end_time": "2023-09-06T17:13:06.914108", + "exception": false, + "start_time": "2023-09-06T17:13:06.908883", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Enwiki Stage 2 : Basic Instruct Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e3aa35e9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:13:06.925901Z", + "iopub.status.busy": "2023-09-06T17:13:06.925655Z", + "iopub.status.idle": "2023-09-06T17:13:14.291842Z", + "shell.execute_reply": "2023-09-06T17:13:14.291053Z" + }, + "papermill": { + "duration": 7.374402, + "end_time": "2023-09-06T17:13:14.293884", + "exception": false, + "start_time": "2023-09-06T17:13:06.919482", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Saving the dataset (0/1 shards): 0%| | 0/14932 [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n", + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.06091904640197754 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank: 0 partition count [1, 1] and sizes[(860549120, False), (768, False)] \r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 654 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "860 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "860 M Total params\r\n", + "3,442.200 Total estimated model params size (MB)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0it [00:00, ?it/s]\r", + "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/jm2b2y5r\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v24\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_173654-jm2b2y5r/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-instruct.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1aa1f08c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:15.869868Z", + "iopub.status.busy": "2023-09-06T17:37:15.869574Z", + "iopub.status.idle": "2023-09-06T17:37:18.336322Z", + "shell.execute_reply": "2023-09-06T17:37:18.335570Z" + }, + "papermill": { + "duration": 3.092432, + "end_time": "2023-09-06T17:37:18.338136", + "exception": false, + "start_time": "2023-09-06T17:37:15.245704", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:17,473] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "07518561", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:19.555489Z", + "iopub.status.busy": "2023-09-06T17:37:19.555305Z", + "iopub.status.idle": "2023-09-06T17:37:19.787874Z", + "shell.execute_reply": "2023-09-06T17:37:19.787184Z" + }, + "papermill": { + "duration": 0.823967, + "end_time": "2023-09-06T17:37:19.789695", + "exception": false, + "start_time": "2023-09-06T17:37:18.965728", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "42fec908", + "metadata": { + "papermill": { + "duration": 0.633567, + "end_time": "2023-09-06T17:37:21.058555", + "exception": false, + "start_time": "2023-09-06T17:37:20.424988", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 2 : Low ctx size (512), memory training\n", + "\n", + "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "577fea20", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:22.293582Z", + "iopub.status.busy": "2023-09-06T17:37:22.293253Z", + "iopub.status.idle": "2023-09-06T17:37:22.339491Z", + "shell.execute_reply": "2023-09-06T17:37:22.339111Z" + }, + "papermill": { + "duration": 0.679609, + "end_time": "2023-09-06T17:37:22.341670", + "exception": false, + "start_time": "2023-09-06T17:37:21.662061", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 10K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 2 Sep 6 17:36 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 11 Sep 6 17:36 ..\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We switch over to fully masked instruct+input, to properly learn the memorization task\n", + "#\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 5000 &\n", + "for i in {5..95..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n", + "done\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n", + "\n", + "#\n", + "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n", + "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n", + "# so we limit the mixture of this data samples\n", + "#\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5928163b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:23.574211Z", + "iopub.status.busy": "2023-09-06T17:37:23.573930Z", + "iopub.status.idle": "2023-09-06T17:37:41.229665Z", + "shell.execute_reply": "2023-09-06T17:37:41.228859Z" + }, + "papermill": { + "duration": 18.288664, + "end_time": "2023-09-06T17:37:41.231690", + "exception": false, + "start_time": "2023-09-06T17:37:22.943026", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:26,449] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4258540337\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 4258540337\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_173728-uklb27ld\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/uklb27ld\u001b[0m\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/uklb27ld\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v25\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_173728-uklb27ld/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n", + " --model.lr_init=5e-4 \\\n", + " --model.lr_final=4e-4 \\\n", + " --data.max_token_size=512 \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3c4e1a84", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:42.501242Z", + "iopub.status.busy": "2023-09-06T17:37:42.500756Z", + "iopub.status.idle": "2023-09-06T17:37:44.926179Z", + "shell.execute_reply": "2023-09-06T17:37:44.925416Z" + }, + "papermill": { + "duration": 3.062059, + "end_time": "2023-09-06T17:37:44.927885", + "exception": false, + "start_time": "2023-09-06T17:37:41.865826", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:44,062] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ff1e2d52", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:46.169833Z", + "iopub.status.busy": "2023-09-06T17:37:46.169341Z", + "iopub.status.idle": "2023-09-06T17:37:46.403204Z", + "shell.execute_reply": "2023-09-06T17:37:46.402355Z" + }, + "papermill": { + "duration": 0.870871, + "end_time": "2023-09-06T17:37:46.404923", + "exception": false, + "start_time": "2023-09-06T17:37:45.534052", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1500.445098, + "end_time": "2023-09-06T17:37:47.161793", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "parameters": {}, + "start_time": "2023-09-06T17:12:46.716695", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth index 5d66a986e13f105e9d1ef1b349003fdf4bd0f4eb..42e0286bc0b229cbc49e0036efc88ed43b44e67e 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf36cb2931ec033f98652643fa297578e6492fdde20223c63fc118f315ec34da +oid sha256:48e76cb4f838aac276f7eaaa0eb325338060592ab995b8cbf2bb0e0b44170c2e size 1721188709 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth deleted file mode 100644 index 92c2877599b885c7d94dd39bba81674afa587188..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:428165f24369bec63746c64c9da6816dc26c17c18a80be1fff372da78294329c -size 1721187621 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth deleted file mode 100644 index 0056aaaf281f52fecf535c5252960e0e5f123453..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:106e6576d2bbb4c31571bf79e574b3fb8233217a6e8a8739b4d8d75de77e73a1 -size 1721187893 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb index 14ee9dd00dac7940061b9d8cc3baea15d28e8b03..7813b2792bd419cca4a8009d8e0283f3454e6922 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1ad9f654a7f6ed10dcf0b15fce28a0b191ce8f52bffde64090163e063ac19ab -size 24177949 +oid sha256:562fdf69a4267c753b425812f407e66acd1a0e0bd0dc29eef451d5f9af9193d1 +size 24740731 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb index f6082fe04f129bbb2d9ad5e7387ec8fd7a2a7d39..1486b975a9aa3f38dc6c130d16932310e66260e0 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb @@ -1,3 +1,141911 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12f10826a59669d9dcf90268a0377596a642007f5642c452e87f76612a62939d -size 52757309 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "718f1c9a", + "metadata": { + "papermill": { + "duration": 0.005477, + "end_time": "2023-09-06T17:24:46.367615", + "exception": false, + "start_time": "2023-09-06T17:24:46.362138", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3237126c", + "metadata": { + "papermill": { + "duration": 0.004583, + "end_time": "2023-09-06T17:24:46.377203", + "exception": false, + "start_time": "2023-09-06T17:24:46.372620", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3b172c13", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:24:46.388594Z", + "iopub.status.busy": "2023-09-06T17:24:46.388114Z", + "iopub.status.idle": "2023-09-06T17:24:47.401851Z", + "shell.execute_reply": "2023-09-06T17:24:47.400352Z" + }, + "papermill": { + "duration": 1.022435, + "end_time": "2023-09-06T17:24:47.404551", + "exception": false, + "start_time": "2023-09-06T17:24:46.382116", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6a5b2ca0", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:24:47.418082Z", + "iopub.status.busy": "2023-09-06T17:24:47.416819Z", + "iopub.status.idle": "2023-09-06T17:24:50.719845Z", + "shell.execute_reply": "2023-09-06T17:24:50.718258Z" + }, + "papermill": { + "duration": 3.312823, + "end_time": "2023-09-06T17:24:50.722482", + "exception": false, + "start_time": "2023-09-06T17:24:47.409659", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c7c5f2ce", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:24:50.736379Z", + "iopub.status.busy": "2023-09-06T17:24:50.735150Z", + "iopub.status.idle": "2023-09-06T17:24:50.748734Z", + "shell.execute_reply": "2023-09-06T17:24:50.747177Z" + }, + "papermill": { + "duration": 0.023207, + "end_time": "2023-09-06T17:24:50.750984", + "exception": false, + "start_time": "2023-09-06T17:24:50.727777", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L6-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=6\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "23272857", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:24:50.764946Z", + "iopub.status.busy": "2023-09-06T17:24:50.764040Z", + "iopub.status.idle": "2023-09-06T17:25:12.804705Z", + "shell.execute_reply": "2023-09-06T17:25:12.803903Z" + }, + "papermill": { + "duration": 22.05082, + "end_time": "2023-09-06T17:25:12.807245", + "exception": false, + "start_time": "2023-09-06T17:24:50.756425", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 17:24:50-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.87, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", + "HTTP request sent, awaiting response... 302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694280290&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MDI5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=l0Mwep-i6GNV-J%7EepezD7A17T72n6mA%7ENVRke24jJ9%7E2CDFf-7C7BUXFmpr2PCyka%7EO123V-aSM9kVMGZj6QIErLtWvw%7ER6iQmC9OFwIRUHp3HyFg-ZkMVj-b97ycZB2mCm3DPehloQrbgQkQcZqzyKTY5kK34eUVuSFcD%7EyM8V7vCuFr5fzKzGw87ji5hdxrxJJ5JbLMqcbtq-dlHHgzDtDI5bFsES5DOVLV0Lk02gg2fU-KxeCXDMPU3MTSuaUky2kQQgy4r2%7ENv20mFp5lSIuedQ2-kCzA8A%7EY50E9EP5qpkWGRBOE7Q52xZVZfwZ6GgXmiz0hw1a1XW0W27C5A__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 17:24:50-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694280290&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MDI5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=l0Mwep-i6GNV-J%7EepezD7A17T72n6mA%7ENVRke24jJ9%7E2CDFf-7C7BUXFmpr2PCyka%7EO123V-aSM9kVMGZj6QIErLtWvw%7ER6iQmC9OFwIRUHp3HyFg-ZkMVj-b97ycZB2mCm3DPehloQrbgQkQcZqzyKTY5kK34eUVuSFcD%7EyM8V7vCuFr5fzKzGw87ji5hdxrxJJ5JbLMqcbtq-dlHHgzDtDI5bFsES5DOVLV0Lk02gg2fU-KxeCXDMPU3MTSuaUky2kQQgy4r2%7ENv20mFp5lSIuedQ2-kCzA8A%7EY50E9EP5qpkWGRBOE7Q52xZVZfwZ6GgXmiz0hw1a1XW0W27C5A__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 108.138.64.121, 108.138.64.49, 108.138.64.111, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.121|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200 OK\r\n", + "Length: 1066536937 (1017M) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L6-D2048-E0_1-enwiki-4k.pth’\r\n", + "\r\n", + "\r", + " v5r3-L6-D 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 1%[ ] 14.74M 60.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D20 1%[ ] 15.39M 34.6MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D204 3%[ ] 30.52M 37.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048 4%[ ] 45.78M 38.8MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048- 6%[> ] 61.03M 41.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E 7%[> ] 76.29M 45.5MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0 8%[> ] 91.20M 48.6MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_ 10%[=> ] 106.29M 49.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_1 11%[=> ] 113.73M 48.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 12%[=> ] 122.07M 46.1MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2048-E0_1-e 13%[=> ] 139.89M 49.1MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2048-E0_1-en 15%[==> ] 152.59M 48.2MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2048-E0_1-enw 16%[==> ] 167.33M 48.8MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2048-E0_1-enwi 16%[==> ] 172.31M 47.5MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2048-E0_1-enwik 18%[==> ] 183.10M 47.5MB/s eta 18s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2048-E0_1-enwiki 19%[==> ] 198.36M 47.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 20%[===> ] 210.46M 48.9MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 21%[===> ] 215.68M 46.7MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 23%[===> ] 243.63M 51.0MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 24%[===> ] 244.26M 49.1MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 25%[====> ] 259.40M 48.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 27%[====> ] 274.66M 50.2MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 28%[====> ] 289.92M 50.1MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 30%[=====> ] 305.18M 49.8MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 31%[=====> ] 322.50M 52.4MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 32%[=====> ] 335.18M 52.3MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 34%[=====> ] 350.44M 47.2MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 35%[======> ] 364.38M 47.1MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 37%[======> ] 380.15M 50.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 38%[======> ] 392.91M 50.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 39%[======> ] 396.73M 47.0MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 40%[=======> ] 411.47M 47.9MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 41%[=======> ] 426.73M 50.0MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 43%[=======> ] 440.68M 46.6MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-4k.pth 44%[=======> ] 455.93M 51.3MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4k.pth 45%[========> ] 457.76M 47.6MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 46%[========> ] 472.50M 46.5MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 46%[========> ] 475.08M 45.9MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 47%[========> ] 487.77M 44.0MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 49%[========> ] 503.54M 43.1MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 51%[=========> ] 518.80M 44.5MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 52%[=========> ] 534.05M 47.1MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 54%[=========> ] 549.31M 49.2MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 55%[==========> ] 564.58M 51.1MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 57%[==========> ] 579.83M 49.1MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 58%[==========> ] 595.09M 50.9MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 59%[==========> ] 608.93M 52.9MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 61%[===========> ] 623.04M 52.4MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 61%[===========> ] 625.61M 51.2MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 62%[===========> ] 640.36M 49.9MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 63%[===========> ] 640.87M 50.0MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 64%[===========> ] 655.62M 50.5MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D20 65%[============> ] 661.31M 51.0MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D204 66%[============> ] 671.38M 51.0MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048 67%[============> ] 686.64M 52.9MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048- 68%[============> ] 694.35M 51.9MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E 68%[============> ] 699.39M 49.5MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0 69%[============> ] 703.65M 46.0MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_ 70%[=============> ] 716.64M 44.8MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048-E0_1 71%[=============> ] 730.72M 47.1MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 72%[=============> ] 732.42M 43.0MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2048-E0_1-e 72%[=============> ] 736.61M 41.3MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2048-E0_1-en 73%[=============> ] 747.17M 40.4MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2048-E0_1-enw 75%[==============> ] 762.94M 41.6MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2048-E0_1-enwi 76%[==============> ] 774.56M 40.6MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2048-E0_1-enwik 77%[==============> ] 789.24M 44.0MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2048-E0_1-enwiki 78%[==============> ] 793.46M 41.0MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 79%[==============> ] 808.71M 44.1MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 80%[===============> ] 823.46M 43.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 82%[===============> ] 838.71M 46.8MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 83%[===============> ] 849.35M 45.0MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 84%[===============> ] 854.49M 42.2MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 85%[================> ] 869.75M 43.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 86%[================> ] 884.49M 45.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 88%[================> ] 896.90M 47.7MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 88%[================> ] 900.27M 46.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 89%[================> ] 915.01M 48.9MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 90%[=================> ] 918.15M 44.7MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 91%[=================> ] 930.27M 47.6MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 92%[=================> ] 945.53M 50.1MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 94%[=================> ] 960.79M 48.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 96%[==================> ] 976.55M 49.3MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 97%[==================> ] 991.82M 50.0MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 99%[==================> ] 1007M 49.7MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 99%[==================> ] 1016M 48.8MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 100%[===================>] 1017M 48.7MB/s in 21s \r\n", + "\r\n", + "2023-09-06 17:25:12 (47.8 MB/s) - ‘v5r3-L6-D2048-E0_1-enwiki-4k.pth’ saved [1066536937/1066536937]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1018M\r\n", + "drwxr-xr-x 2 root root 4.0K Sep 6 17:24 .\r\n", + "drwxr-xr-x 20 root root 4.0K Sep 6 17:24 ..\r\n", + "-rw-r--r-- 1 root root 1018M Sep 6 17:07 v5r3-L6-D2048-E0_1-enwiki-4k.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a7640f2b", + "metadata": { + "papermill": { + "duration": 0.012199, + "end_time": "2023-09-06T17:25:12.831966", + "exception": false, + "start_time": "2023-09-06T17:25:12.819767", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Enwiki Stage 2 : Basic Instruct Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dbfa1a63", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:25:12.852580Z", + "iopub.status.busy": "2023-09-06T17:25:12.851982Z", + "iopub.status.idle": "2023-09-06T17:25:20.359135Z", + "shell.execute_reply": "2023-09-06T17:25:20.357791Z" + }, + "papermill": { + "duration": 7.52148, + "end_time": "2023-09-06T17:25:20.362711", + "exception": false, + "start_time": "2023-09-06T17:25:12.841231", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Saving the dataset (0/1 shards): 0%| | 0/14932 [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.08642840385437012 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank: 0 partition count [1, 1] and sizes[(533245952, False), (384, False)] \r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 327 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "533 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "533 M Total params\r\n", + "2,132.985 Total estimated model params size (MB)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0it [00:00, ?it/s]\r", + "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: - 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/xuck99wm\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v27\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_175222-xuck99wm/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-instruct.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c93850c1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:39.516649Z", + "iopub.status.busy": "2023-09-06T17:52:39.515772Z", + "iopub.status.idle": "2023-09-06T17:52:43.445323Z", + "shell.execute_reply": "2023-09-06T17:52:43.444433Z" + }, + "papermill": { + "duration": 4.911758, + "end_time": "2023-09-06T17:52:43.448281", + "exception": false, + "start_time": "2023-09-06T17:52:38.536523", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:52:41,942] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9cbbdc1c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:45.335301Z", + "iopub.status.busy": "2023-09-06T17:52:45.334570Z", + "iopub.status.idle": "2023-09-06T17:52:45.599928Z", + "shell.execute_reply": "2023-09-06T17:52:45.598870Z" + }, + "papermill": { + "duration": 1.153922, + "end_time": "2023-09-06T17:52:45.602059", + "exception": false, + "start_time": "2023-09-06T17:52:44.448137", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "9759931b", + "metadata": { + "papermill": { + "duration": 0.980039, + "end_time": "2023-09-06T17:52:47.551588", + "exception": false, + "start_time": "2023-09-06T17:52:46.571549", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 2 : Low ctx size (512), memory training\n", + "\n", + "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "e4b68f37", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:49.396209Z", + "iopub.status.busy": "2023-09-06T17:52:49.395658Z", + "iopub.status.idle": "2023-09-06T17:52:49.450143Z", + "shell.execute_reply": "2023-09-06T17:52:49.449447Z" + }, + "papermill": { + "duration": 1.025106, + "end_time": "2023-09-06T17:52:49.452088", + "exception": false, + "start_time": "2023-09-06T17:52:48.426982", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 8.0K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 4.0K Sep 6 17:52 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 4.0K Sep 6 17:52 ..\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We switch over to fully masked instruct+input, to properly learn the memorization task\n", + "#\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 5000 &\n", + "for i in {5..95..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n", + "done\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n", + "\n", + "#\n", + "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n", + "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n", + "# so we limit the mixture of this data samples\n", + "#\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "dfe7c26c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:51.295342Z", + "iopub.status.busy": "2023-09-06T17:52:51.294782Z", + "iopub.status.idle": "2023-09-06T17:53:11.646941Z", + "shell.execute_reply": "2023-09-06T17:53:11.645847Z" + }, + "papermill": { + "duration": 21.229481, + "end_time": "2023-09-06T17:53:11.649754", + "exception": false, + "start_time": "2023-09-06T17:52:50.420273", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:52:56,145] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1381932438\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 1381932438\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: - Waiting for wandb.init()...\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \\ Waiting for wandb.init()...\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_175300-6yfdmqhq\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/6yfdmqhq\u001b[0m\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L6-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/6yfdmqhq\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v28\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_175300-6yfdmqhq/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n", + " --model.lr_init=5e-4 \\\n", + " --model.lr_final=4e-4 \\\n", + " --data.max_token_size=512 \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a64f121e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:53:13.678266Z", + "iopub.status.busy": "2023-09-06T17:53:13.677894Z", + "iopub.status.idle": "2023-09-06T17:53:17.614786Z", + "shell.execute_reply": "2023-09-06T17:53:17.613856Z" + }, + "papermill": { + "duration": 4.956429, + "end_time": "2023-09-06T17:53:17.617091", + "exception": false, + "start_time": "2023-09-06T17:53:12.660662", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:53:16,116] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7b3383b0", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:53:19.466424Z", + "iopub.status.busy": "2023-09-06T17:53:19.465359Z", + "iopub.status.idle": "2023-09-06T17:53:19.731128Z", + "shell.execute_reply": "2023-09-06T17:53:19.730200Z" + }, + "papermill": { + "duration": 1.231112, + "end_time": "2023-09-06T17:53:19.733404", + "exception": false, + "start_time": "2023-09-06T17:53:18.502292", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1715.908096, + "end_time": "2023-09-06T17:53:21.033393", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb", + "parameters": {}, + "start_time": "2023-09-06T17:24:45.125297", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb deleted file mode 100644 index 31c7d50e0c7c5197bc24cd44c364ab59a245d4e8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04b893c23868d4438d1733d8b85afe79f3065c7074b4f3d1ebc27a0b29b350a0 -size 52755279 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb deleted file mode 100644 index 7e206cc12ec5ccf27fa871d361750dce04655d13..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0439ce7fb6866af36cb53bbddaf6a1ed49656c85a84d6a2aabd6754b30fa2109 -size 61159745 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index 7ffd7172b586e0b8857d504b71f3808861d311aa..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3855c5ed19072f2ecaec4294c3945a3290e692d15c0aa7351c7d4917404fbf65 -size 38208798 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 2b40078b2fa26fff7d07cf99fa6ea18a8e9a50b1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e318ecc4d20d89232f8a0677c54f8489d602cb3d66632cadb29c028c917eb00e -size 30322339 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth index 6b524ff4d5b17f08354f3346a9320b826b489ff9..ce2745ae53706070d3d5b763f87844aac4ce17d5 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a98bb4421b50083bfbb0e4233838456ff812373a85d2bf87d23cdb8aa6b8d702 +oid sha256:7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e size 1066536937 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth index ccb59159ba5b615cb7e3a06a2a6bdc929502ce52..112045636f4926df8441ee57606b58ed5b30a86f 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a83bdbbf6d686bfa77529fc9bbde3a91fc8d182e1dc33ce8d18f2a0abbe2576 +oid sha256:f3f8a205f62d0b8c773766d39ec62bfd98918e977beba55d333548ad1df81dfe size 1066537777 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth deleted file mode 100644 index 8af2cc985147220953dab23cbe6635297e1208ab..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf3a15d56db013d138bed6780d58c4362ca96b3ef98fb98e2d1444f325c582b5 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth deleted file mode 100644 index ec338ad0a412b427fe4cc7115959eb96face9f73..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76cc2d79013781f18c6507848a138150084d37c3aae0f20145e7e5854bcabb99 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth deleted file mode 100644 index ae6d693b3aafaef43037882c09d0bebc48f35c22..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f1b8811c3f2f8c12c564edb973392bb486c9d674152f3eb769ac206c20bcfc0 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth deleted file mode 100644 index baae90af34d9f56b91e9010b43112a84b2184fb0..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f -size 1066537217 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth deleted file mode 100644 index 18bf2327f7771b99c67b0d997cbdf1ba8f34902d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db7ab09a447f96d9fcdbee4761bd35f2c3bce9868d3136959ed601a8e478083c -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth deleted file mode 100644 index 063c157afbc559f19f9a7a0eb7d82adfc2eb06ce..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9ec28149b79524846c14ec90b0a206a3529abb584575553320f2427be475225 -size 1066537357 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth index 1436a34a06eb09d5b2334edf591409a6c7681d4c..ab0a70373f4499e6d8ceca3ce6107ebcf0ef2f82 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98ff68af85a7db87d6e8aeb68d8e36d403dc41e947e090dcbc8e13dbeeb50406 +oid sha256:ea0b14c5aafb10ad2506806e44a34c09ad518076fc485ab5dfec004219cf6db5 size 1066537497 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb deleted file mode 100644 index 6518411075f0671db66215e751dcfb84e7744907..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0977926c2d6b24d07dbdbaff2712954f647d5b697863bc918c4ffc0f431f572 -size 24384207 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb deleted file mode 100644 index 4db294f20953a07e8a6d96806c7609973258cd21..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97d1563b2840253fdb03cb024ff0447bb295fdc9a3c255c4c39bc01eb46720a0 -size 48457692 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb deleted file mode 100644 index 67972444f9ed3d06fa1d595845dd1bbdebb54e5f..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46167bb1edba84bd184533d3d424a8c4d9ed5a15d8b93474596453f235462f13 -size 58866814 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index d2df3a3f63985ebd40d91463248c0db8fe7ba56b..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17f586e9a94c3fbf463e84c518ca7712b6fd539d3c8dda8e4893115a1298c8d4 -size 37212168 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 4c2298987669d56cd07135b0ec4f9da50c75303c..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b5c4dec751996e61882229f30bba0e005ce01e44319a1b75011aacad7575fc0 -size 30004883 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth deleted file mode 100644 index a0c87934877f4b602fb1837f3622e834c68df8f8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cd66a2944fa9937d02db9ca207d45532d46f6bea1b1d8b0110da9070284e336 -size 1537632233 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth deleted file mode 100644 index 9d678fc0b0c7e5c727a676497397f4fa3acbdd6c..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:034212556c2d5dd6eca8d12ee3ec0daf4aad7cddeab006934130e7fdab4a2b34 -size 1537633073 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth deleted file mode 100644 index b525c72fd31827ffe8aadfecb883cf5b3cceb8f6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d91f2c3f5b96e9d249342bdead58f58d3b1f5ab7c92401a50ab4e5170ae2636 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth deleted file mode 100644 index 4aade76d3bbd951a784adce9596a631a5c1640e8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a00e8cbc2222bb853dc5f83fe3d6f4c43f4b970cc554be37fb937d476e3eaf88 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth deleted file mode 100644 index 4b8c0bd7e1c67c46e2676c3c9141d7c46019114d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cc22ca95e9a2054534ef8fcce63cf2d0ce65916b39318ce650debd41adee876 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth deleted file mode 100644 index 60365f13edf8b2534dc2ec815b0cc8e327a79570..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498 -size 1537632513 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth deleted file mode 100644 index 242c0b4bb345f3db30feafae94d0928d82498501..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:28d84ce479bb5ca4e3a226f9eb03266c344a398bfff8f420af4ae5598f23fe86 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth deleted file mode 100644 index a0dce592c711e7aa9e3268e4856ed36b1995db1e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4da514e31111bd781ef43fd38c278c49c8e3228c9546dacebc3aaa1710d33753 -size 1537632653 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth deleted file mode 100644 index 78b68d75077f38b3b22e07195a4ab7b0e4f4bfc2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba3acd62e52a17868b280dc1c746498810014b39eb1282c952e45a1dd1bdc058 -size 1537632793 diff --git a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth b/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth deleted file mode 100644 index 1bf1e3dc51ade1b71678c5f59d05067dfad6a652..0000000000000000000000000000000000000000 --- a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:327bb880f90f80ba9d119f0ca43fb108994e02c5935ec0384a619b092fc2f341 -size 5774098255 diff --git a/manual-uploads/3B-code/_anchor b/manual-uploads/3B-code/_anchor deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth deleted file mode 100644 index 85b2cc712939163ad6ffc54bc460941d4b9d38cd..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1844acad3a36721d4427efa928dd7bbe84bff6ec98ceb310db33987106672a8d -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth deleted file mode 100644 index fbcd575d03307f44cf24f2855f831273aa31a819..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a8289e17931e0d3ed2cc213eaa66e1ce12f005c69030a9afb38b33987f8877b -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth deleted file mode 100644 index ee19bc64b756b3f670b76b0754f8c9bb20960aab..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3faf38a7820276bc2dc36d27259d7067c56aa228ec5dd72f743dfc9d72ff3988 -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth deleted file mode 100644 index 414398090acb93c53e824c8b21fd9a840dc301e6..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0dac80051873f2fc1bb4645d7986330b49976520ddad6574ab4ad4d3dc3bdc15 -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth deleted file mode 100644 index 8e7c0e5c5233a0b493e06a7d261ab72a14e68d71..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11238a58929e3cb5c4cfe2777e555c66f01a09e391361e6cc30143eb5360e1ac -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth deleted file mode 100644 index d46afdf695f7e39687ceae0cafb215d175b58936..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aeea26eac9b77fdc82484533e98374cc06b08ab2084d7e7f062325b86a912b -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/_anchor.txt b/manual-uploads/RWKV-v5-memory-test/_anchor.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000