diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb" new file mode 100644--- /dev/null +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb" @@ -0,0 +1,17243 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "34787543", + "metadata": { + "papermill": { + "duration": 0.004045, + "end_time": "2023-09-06T00:20:51.170319", + "exception": false, + "start_time": "2023-09-06T00:20:51.166274", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8aa40f90", + "metadata": { + "papermill": { + "duration": 0.002614, + "end_time": "2023-09-06T00:20:51.175858", + "exception": false, + "start_time": "2023-09-06T00:20:51.173244", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "53fff9d1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T00:20:51.180467Z", + "iopub.status.busy": "2023-09-06T00:20:51.180322Z", + "iopub.status.idle": "2023-09-06T00:20:52.066156Z", + "shell.execute_reply": "2023-09-06T00:20:52.065233Z" + }, + "papermill": { + "duration": 0.890028, + "end_time": "2023-09-06T00:20:52.068428", + "exception": false, + "start_time": "2023-09-06T00:20:51.178400", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize32 checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5r2\t datapath\toutput\r\n", + "README.md RWKV-v5altwavenet RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5headsize2x RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a388f6e6", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T00:20:52.076313Z", + "iopub.status.busy": "2023-09-06T00:20:52.076079Z", + "iopub.status.idle": "2023-09-06T00:20:54.167404Z", + "shell.execute_reply": "2023-09-06T00:20:54.166623Z" + }, + "papermill": { + "duration": 2.097425, + "end_time": "2023-09-06T00:20:54.169133", + "exception": false, + "start_time": "2023-09-06T00:20:52.071708", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e2f52f13", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T00:20:54.176834Z", + "iopub.status.busy": "2023-09-06T00:20:54.176573Z", + "iopub.status.idle": "2023-09-06T00:20:54.185506Z", + "shell.execute_reply": "2023-09-06T00:20:54.184977Z" + }, + "papermill": { + "duration": 0.014335, + "end_time": "2023-09-06T00:20:54.186704", + "exception": false, + "start_time": "2023-09-06T00:20:54.172369", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=12\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ff222a35", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T00:20:54.193945Z", + "iopub.status.busy": "2023-09-06T00:20:54.193803Z", + "iopub.status.idle": "2023-09-06T00:20:54.416273Z", + "shell.execute_reply": "2023-09-06T00:20:54.415523Z" + }, + "papermill": { + "duration": 0.227953, + "end_time": "2023-09-06T00:20:54.418012", + "exception": false, + "start_time": "2023-09-06T00:20:54.190059", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}\r\n" + ] + } + ], + "source": [ + "# Init the model\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n", + " python3 ./init_model.py \\\n", + " --n_layer \"{LAYER_COUNT}\" --n_embd \"{EMBED_DIM}\" \\\n", + " --emb-scale \"{EMBED_SCALE}\" \\\n", + " --vocab_size neox --skip-if-exists \\\n", + " \"../model/{FILENAME_PREFIX}-neox-init.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "1f427699", + "metadata": { + "papermill": { + "duration": 0.003102, + "end_time": "2023-09-06T00:20:54.424585", + "exception": false, + "start_time": "2023-09-06T00:20:54.421483", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Enwiki Stage 1 : Foundation 4k model training" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e4a7fe9a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T00:20:54.430076Z", + "iopub.status.busy": "2023-09-06T00:20:54.429584Z", + "iopub.status.idle": "2023-09-06T00:38:40.364127Z", + "shell.execute_reply": "2023-09-06T00:38:40.363334Z" + }, + "papermill": { + "duration": 1065.938908, + "end_time": "2023-09-06T00:38:40.366018", + "exception": false, + "start_time": "2023-09-06T00:20:54.427110", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Downloading readme: 0%| | 0.00/433 [00:00