[GHA] trainer-v4-unit-test/model-init.ipynb result notebooks

by picocreator - opened Aug 23, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+300

-0

Files changed (1) hide show

actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/trainer-v4-unit-test/model-init.ipynb +300 -0

actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/trainer-v4-unit-test/model-init.ipynb ADDED Viewed

	@@ -0,0 +1,300 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "a6359476",
+   "metadata": {
+    "papermill": {
+     "duration": 0.002323,
+     "end_time": "2023-08-23T10:26:43.282956",
+     "exception": false,
+     "start_time": "2023-08-23T10:26:43.280633",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Model Init\n",
+    "\n",
+    "Test that the model init code, runs without issues\n",
+    "\n",
+    "**L6-D512 model with**\n",
+    "- Layer count: 6\n",
+    "- Embed size: 512"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "6c73e486",
+   "metadata": {
+    "notebookRunGroups": {
+     "groupValue": ""
+    },
+    "papermill": {
+     "duration": 0.001515,
+     "end_time": "2023-08-23T10:26:43.286466",
+     "exception": false,
+     "start_time": "2023-08-23T10:26:43.284951",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Preparing the init model and test dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "fcface89",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-23T10:26:43.291673Z",
+     "iopub.status.busy": "2023-08-23T10:26:43.291157Z",
+     "iopub.status.idle": "2023-08-23T10:26:44.037056Z",
+     "shell.execute_reply": "2023-08-23T10:26:44.036039Z"
+    },
+    "papermill": {
+     "duration": 0.751285,
+     "end_time": "2023-08-23T10:26:44.039482",
+     "exception": false,
+     "start_time": "2023-08-23T10:26:43.288197",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# First lets setup the various directories\n",
+    "!mkdir -p ../../model/\n",
+    "!mkdir -p ../../datapath/\n",
+    "!mkdir -p ../../checkpoint/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b747f284",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-23T10:26:44.045133Z",
+     "iopub.status.busy": "2023-08-23T10:26:44.044626Z",
+     "iopub.status.idle": "2023-08-23T10:26:53.053696Z",
+     "shell.execute_reply": "2023-08-23T10:26:53.052569Z"
+    },
+    "papermill": {
+     "duration": 9.015161,
+     "end_time": "2023-08-23T10:26:53.056640",
+     "exception": false,
+     "start_time": "2023-08-23T10:26:44.041479",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-08-23 10:26:48,317] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "---- Initializing model ----\r\n",
+      "No of layers: 6\r\n",
+      "Embedding size: 512\r\n",
+      "Output model path: ../model/L6-D512-neox-init.pth\r\n",
+      "Vocab size: 50277\r\n",
+      "Note: this process takes a significant time (and ram) for large models\r\n",
+      "---- ----- ----\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Detected CUDA files, patching ldflags\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/wkv_1_bf16/build.ninja...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building extension module wkv_1_bf16...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
+      "ninja: no work to do.\r\n",
+      "Loading extension module wkv_1_bf16...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model]: Finished initial model load\r\n",
+      "50277 512   -0.0001 emb.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.0.att.key.weight\r\n",
+      "512   512   1.0  blocks.0.att.value.weight\r\n",
+      "512   512   0    blocks.0.att.receptance.weight\r\n",
+      "512   512   0    blocks.0.att.output.weight\r\n",
+      "2048  512   1.0  blocks.0.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.0.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.0.ffn.value.weight\r\n",
+      "512   512   0    blocks.1.att.key.weight\r\n",
+      "512   512   1.0  blocks.1.att.value.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.1.att.receptance.weight\r\n",
+      "512   512   0    blocks.1.att.output.weight\r\n",
+      "2048  512   1.0  blocks.1.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.1.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.1.ffn.value.weight\r\n",
+      "512   512   0    blocks.2.att.key.weight\r\n",
+      "512   512   1.0  blocks.2.att.value.weight\r\n",
+      "512   512   0    blocks.2.att.receptance.weight\r\n",
+      "512   512   0    blocks.2.att.output.weight\r\n",
+      "2048  512   1.0  blocks.2.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.2.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.2.ffn.value.weight\r\n",
+      "512   512   0    blocks.3.att.key.weight\r\n",
+      "512   512   1.0  blocks.3.att.value.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.3.att.receptance.weight\r\n",
+      "512   512   0    blocks.3.att.output.weight\r\n",
+      "2048  512   1.0  blocks.3.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.3.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.3.ffn.value.weight\r\n",
+      "512   512   0    blocks.4.att.key.weight\r\n",
+      "512   512   1.0  blocks.4.att.value.weight\r\n",
+      "512   512   0    blocks.4.att.receptance.weight\r\n",
+      "512   512   0    blocks.4.att.output.weight\r\n",
+      "2048  512   1.0  blocks.4.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.4.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.4.ffn.value.weight\r\n",
+      "512   512   0    blocks.5.att.key.weight\r\n",
+      "512   512   1.0  blocks.5.att.value.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.5.att.receptance.weight\r\n",
+      "512   512   0    blocks.5.att.output.weight\r\n",
+      "2048  512   1.0  blocks.5.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.5.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.5.ffn.value.weight\r\n",
+      "50277 512   0.5  head.weight\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets initialized the L6-D512 model with the init_model.py code\n",
+    "!cd ../../RWKV-v4neo/ && python3 init_model.py --n_layer 6 --n_embd 512 --vocab_size neox ../model/L6-D512-neox-init.pth"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "rwkv-exp",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 11.337437,
+   "end_time": "2023-08-23T10:26:53.383943",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
+   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
+   "parameters": {},
+   "start_time": "2023-08-23T10:26:42.046506",
+   "version": "2.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}