diff --git "a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" "b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" --- "a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" +++ "b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" @@ -3,13 +3,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "ef458e0c", + "id": "bb9e93fd", "metadata": { "papermill": { - "duration": 0.002614, - "end_time": "2023-09-29T05:06:25.725060", + "duration": 0.003202, + "end_time": "2023-09-29T05:48:15.621385", "exception": false, - "start_time": "2023-09-29T05:06:25.722446", + "start_time": "2023-09-29T05:48:15.618183", "status": "completed" }, "tags": [] @@ -23,13 +23,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "58eb3f3e", + "id": "ec0b74ac", "metadata": { "papermill": { - "duration": 0.00201, - "end_time": "2023-09-29T05:06:25.730966", + "duration": 0.002532, + "end_time": "2023-09-29T05:48:15.628420", "exception": false, - "start_time": "2023-09-29T05:06:25.728956", + "start_time": "2023-09-29T05:48:15.625888", "status": "completed" }, "tags": [] @@ -41,19 +41,19 @@ { "cell_type": "code", "execution_count": 1, - "id": "e0abbad9", + "id": "89d0e842", "metadata": { "execution": { - "iopub.execute_input": "2023-09-29T05:06:25.737449Z", - "iopub.status.busy": "2023-09-29T05:06:25.736495Z", - "iopub.status.idle": "2023-09-29T05:06:26.482958Z", - "shell.execute_reply": "2023-09-29T05:06:26.482054Z" + "iopub.execute_input": "2023-09-29T05:48:15.635765Z", + "iopub.status.busy": "2023-09-29T05:48:15.635042Z", + "iopub.status.idle": "2023-09-29T05:48:16.384023Z", + "shell.execute_reply": "2023-09-29T05:48:16.383082Z" }, "papermill": { - "duration": 0.751859, - "end_time": "2023-09-29T05:06:26.485032", + "duration": 0.755093, + "end_time": "2023-09-29T05:48:16.386307", "exception": false, - "start_time": "2023-09-29T05:06:25.733173", + "start_time": "2023-09-29T05:48:15.631214", "status": "completed" }, "tags": [] @@ -69,19 +69,19 @@ { "cell_type": "code", "execution_count": 2, - "id": "42d56a7f", + "id": "eb4d593c", "metadata": { "execution": { - "iopub.execute_input": "2023-09-29T05:06:26.491452Z", - "iopub.status.busy": "2023-09-29T05:06:26.490928Z", - "iopub.status.idle": "2023-09-29T05:06:26.499148Z", - "shell.execute_reply": "2023-09-29T05:06:26.498384Z" + "iopub.execute_input": "2023-09-29T05:48:16.393985Z", + "iopub.status.busy": "2023-09-29T05:48:16.393428Z", + "iopub.status.idle": "2023-09-29T05:48:16.401146Z", + "shell.execute_reply": "2023-09-29T05:48:16.400448Z" }, "papermill": { - "duration": 0.013307, - "end_time": "2023-09-29T05:06:26.500768", + "duration": 0.013429, + "end_time": "2023-09-29T05:48:16.402758", "exception": false, - "start_time": "2023-09-29T05:06:26.487461", + "start_time": "2023-09-29T05:48:16.389329", "status": "completed" }, "tags": [] @@ -140,19 +140,19 @@ { "cell_type": "code", "execution_count": 3, - "id": "5514ed91", + "id": "02a8c148", "metadata": { "execution": { - "iopub.execute_input": "2023-09-29T05:06:26.507274Z", - "iopub.status.busy": "2023-09-29T05:06:26.506786Z", - "iopub.status.idle": "2023-09-29T05:06:55.991075Z", - "shell.execute_reply": "2023-09-29T05:06:55.990231Z" + "iopub.execute_input": "2023-09-29T05:48:16.410433Z", + "iopub.status.busy": "2023-09-29T05:48:16.409951Z", + "iopub.status.idle": "2023-09-29T05:48:45.475788Z", + "shell.execute_reply": "2023-09-29T05:48:45.474892Z" }, "papermill": { - "duration": 29.490941, - "end_time": "2023-09-29T05:06:55.994238", + "duration": 29.072394, + "end_time": "2023-09-29T05:48:45.478206", "exception": false, - "start_time": "2023-09-29T05:06:26.503297", + "start_time": "2023-09-29T05:48:16.405812", "status": "completed" }, "tags": [] @@ -162,20 +162,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2023-09-29 05:06:30,625] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + "[2023-09-29 05:48:20,485] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", "---- Initializing model ----\r\n", "No of layers: 6\r\n", "Embedding size: 2048\r\n", @@ -234,42 +228,42 @@ "output_type": "stream", "text": [ "2048 2048 0 blocks.0.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.0.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.1.att.gate.weight\r\n" + "2048 7168 0 blocks.0.ffn.value.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.1.att.receptance.weight\r\n" + "2048 2048 1.0 blocks.1.att.gate.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.1.att.key.weight\r\n" + "2048 2048 1.0 blocks.1.att.receptance.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.1.att.value.weight\r\n" + "2048 2048 1.0 blocks.1.att.key.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 0 blocks.1.att.output.weight\r\n" + "2048 2048 1.0 blocks.1.att.value.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "2048 2048 0 blocks.1.att.output.weight\r\n", "7168 2048 1.0 blocks.1.ffn.key.weight\r\n" ] }, @@ -278,42 +272,42 @@ "output_type": "stream", "text": [ "2048 2048 0 blocks.1.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.1.ffn.value.weight\r\n" + "2048 7168 0 blocks.1.ffn.value.weight\r\n", + "2048 2048 1.0 blocks.2.att.gate.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.2.att.gate.weight\r\n" + "2048 2048 1.0 blocks.2.att.receptance.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.2.att.receptance.weight\r\n" + "2048 2048 1.0 blocks.2.att.key.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.2.att.key.weight\r\n" + "2048 2048 1.0 blocks.2.att.value.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 1.0 blocks.2.att.value.weight\r\n" + "2048 2048 0 blocks.2.att.output.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 0 blocks.2.att.output.weight\r\n", "7168 2048 1.0 blocks.2.ffn.key.weight\r\n" ] }, @@ -321,14 +315,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 0 blocks.2.ffn.receptance.weight\r\n" + "2048 2048 0 blocks.2.ffn.receptance.weight\r\n", + "2048 7168 0 blocks.2.ffn.value.weight\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "2048 7168 0 blocks.2.ffn.value.weight\r\n", "2048 2048 1.0 blocks.3.att.gate.weight\r\n" ] }, @@ -366,7 +360,13 @@ "output_type": "stream", "text": [ "2048 2048 0 blocks.3.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.3.ffn.value.weight\r\n", + "2048 7168 0 blocks.3.ffn.value.weight\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "2048 2048 1.0 blocks.4.att.gate.weight\r\n" ] }, @@ -404,7 +404,13 @@ "output_type": "stream", "text": [ "2048 2048 0 blocks.4.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.4.ffn.value.weight\r\n", + "2048 7168 0 blocks.4.ffn.value.weight\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "2048 2048 1.0 blocks.5.att.gate.weight\r\n" ] }, @@ -433,7 +439,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "2048 2048 0 blocks.5.att.output.weight\r\n", + "2048 2048 0 blocks.5.att.output.weight\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "7168 2048 1.0 blocks.5.ffn.key.weight\r\n" ] }, @@ -459,13 +471,13 @@ }, { "cell_type": "markdown", - "id": "8afd9e50", + "id": "92eb36b4", "metadata": { "papermill": { - "duration": 0.005752, - "end_time": "2023-09-29T05:06:56.006385", + "duration": 0.00572, + "end_time": "2023-09-29T05:48:45.490182", "exception": false, - "start_time": "2023-09-29T05:06:56.000633", + "start_time": "2023-09-29T05:48:45.484462", "status": "completed" }, "tags": [] @@ -477,19 +489,19 @@ { "cell_type": "code", "execution_count": 4, - "id": "ff78d2bd", + "id": "b6db7965", "metadata": { "execution": { - "iopub.execute_input": "2023-09-29T05:06:56.020959Z", - "iopub.status.busy": "2023-09-29T05:06:56.020447Z", - "iopub.status.idle": "2023-09-29T05:07:01.579575Z", - "shell.execute_reply": "2023-09-29T05:07:01.578476Z" + "iopub.execute_input": "2023-09-29T05:48:45.504239Z", + "iopub.status.busy": "2023-09-29T05:48:45.503871Z", + "iopub.status.idle": "2023-09-29T05:55:02.308216Z", + "shell.execute_reply": "2023-09-29T05:55:02.307296Z" }, "papermill": { - "duration": 5.569483, - "end_time": "2023-09-29T05:07:01.582319", + "duration": 376.815064, + "end_time": "2023-09-29T05:55:02.311331", "exception": false, - "start_time": "2023-09-29T05:06:56.012836", + "start_time": "2023-09-29T05:48:45.496267", "status": "completed" }, "tags": [] @@ -499,35 +511,17232 @@ "name": "stdout", "output_type": "stream", "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/preload_datapath.py\", line 20, in \r\n", - " assert os.path.exists(config_file), \"Config file does not exist\"\r\n", - "AssertionError: Config file does not exist\r\n" + "\r", + "Downloading readme: 0%| | 0.00/433 [00:00