diff --git "a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" "b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" --- "a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" +++ "b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb" @@ -1,15426 +1,3 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "215f6d68", - "metadata": { - "papermill": { - "duration": 0.00298, - "end_time": "2023-09-29T06:34:37.429814", - "exception": false, - "start_time": "2023-09-29T06:34:37.426834", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# RWKV v5 multi-size training experiment\n", - "\n", - "**Note:** This project assumes you have the rwkv-infctx conda env setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "bf0cf97c", - "metadata": { - "papermill": { - "duration": 0.002312, - "end_time": "2023-09-29T06:34:37.435725", - "exception": false, - "start_time": "2023-09-29T06:34:37.433413", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Basic Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "98d95606", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:34:37.440037Z", - "iopub.status.busy": "2023-09-29T06:34:37.439753Z", - "iopub.status.idle": "2023-09-29T06:34:38.107502Z", - "shell.execute_reply": "2023-09-29T06:34:38.106666Z" - }, - "papermill": { - "duration": 0.672001, - "end_time": "2023-09-29T06:34:38.109558", - "exception": false, - "start_time": "2023-09-29T06:34:37.437557", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# First lets setup the various directories, and init the model\n", - "!mkdir -p ../../../../model/\n", - "!mkdir -p ../../../../datapath/\n", - "!mkdir -p ../../../../checkpoint/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d799a503", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:34:38.116182Z", - "iopub.status.busy": "2023-09-29T06:34:38.115939Z", - "iopub.status.idle": "2023-09-29T06:34:38.124075Z", - "shell.execute_reply": "2023-09-29T06:34:38.123428Z" - }, - "papermill": { - "duration": 0.012774, - "end_time": "2023-09-29T06:34:38.125078", - "exception": false, - "start_time": "2023-09-29T06:34:38.112304", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_1\n", - "ENABLE_WANDB: True\n", - "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train\n", - "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" - ] - } - ], - "source": [ - "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", - "GPU_DEVICES=\"auto\"\n", - "ENABLE_WANDB=True\n", - "\n", - "EMBED_SCALE=0.01\n", - "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", - "\n", - "LAYER_COUNT=6\n", - "EMBED_SIZE=2048\n", - "\n", - "WANDB_PREFIX=f\"[Multi-size] v5-L{LAYER_COUNT}-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n", - "FILENAME_PREFIX=f\"v5-L{LAYER_COUNT}-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n", - "\n", - "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", - "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", - "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", - "\n", - "if ENABLE_WANDB:\n", - " WANDB_MODE=\"online\"\n", - "else:\n", - " WANDB_MODE=\"disabled\"\n", - "\n", - "# Computing the notebook, and various paths\n", - "import os\n", - "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", - "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", - "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "\n", - "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", - "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", - "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", - "print(\"PROJECT_DIR:\", PROJECT_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e4204bbd", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:34:38.128341Z", - "iopub.status.busy": "2023-09-29T06:34:38.128199Z", - "iopub.status.idle": "2023-09-29T06:34:57.534218Z", - "shell.execute_reply": "2023-09-29T06:34:57.533501Z" - }, - "papermill": { - "duration": 19.409767, - "end_time": "2023-09-29T06:34:57.536209", - "exception": false, - "start_time": "2023-09-29T06:34:38.126442", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-09-29 06:34:40,927] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", - "---- Initializing model ----\r\n", - "No of layers: 6\r\n", - "Embedding size: 2048\r\n", - "Output model path: ../model/v5-L6-D2048-E0_01-neox-v5base-init.pth\r\n", - "Vocab size: 50277\r\n", - "Emb scale: 0.01\r\n", - "Note: this process takes a significant time (and ram) for large models\r\n", - "---- ----- ----\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "50277 2048 -0.01 emb.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.0.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.0.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.0.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.0.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.0.att.output.weight\r\n", - "7168 2048 1.0 blocks.0.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.0.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.0.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.1.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.1.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.1.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.1.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.1.att.output.weight\r\n", - "7168 2048 1.0 blocks.1.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.1.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.1.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.2.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.2.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.2.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.2.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.2.att.output.weight\r\n", - "7168 2048 1.0 blocks.2.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.2.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.2.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.3.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.3.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.3.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.3.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.3.att.output.weight\r\n", - "7168 2048 1.0 blocks.3.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.3.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.3.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.4.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.4.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.4.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.4.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.4.att.output.weight\r\n", - "7168 2048 1.0 blocks.4.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.4.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.4.ffn.value.weight\r\n", - "2048 2048 1.0 blocks.5.att.gate.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.5.att.receptance.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.5.att.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 1.0 blocks.5.att.value.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.5.att.output.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7168 2048 1.0 blocks.5.ffn.key.weight\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2048 2048 0 blocks.5.ffn.receptance.weight\r\n", - "2048 7168 0 blocks.5.ffn.value.weight\r\n", - "50277 2048 0.5 head.weight\r\n" - ] - } - ], - "source": [ - "# Init the model\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 ./init_model.py \\\n", - " --n_layer {LAYER_COUNT} --n_embd {EMBED_SIZE} \\\n", - " --emb-scale \"{EMBED_SCALE}\" \\\n", - " --vocab_size neox --skip-if-exists \\\n", - " \"../model/{FILENAME_PREFIX}-neox-v5base-init.pth\"" - ] - }, - { - "cell_type": "markdown", - "id": "25aa156c", - "metadata": { - "papermill": { - "duration": 0.004287, - "end_time": "2023-09-29T06:34:57.545982", - "exception": false, - "start_time": "2023-09-29T06:34:57.541695", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Enwiki Stage 1 : Foundation 4k model training" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "dfb884c6", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:34:57.552286Z", - "iopub.status.busy": "2023-09-29T06:34:57.552023Z", - "iopub.status.idle": "2023-09-29T06:35:06.302944Z", - "shell.execute_reply": "2023-09-29T06:35:06.302095Z" - }, - "papermill": { - "duration": 8.756445, - "end_time": "2023-09-29T06:35:06.305010", - "exception": false, - "start_time": "2023-09-29T06:34:57.548565", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Saving the dataset (0/3 shards): 0%| | 0/54405 [00:00\r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", - " self._run_subcommand(self.subcommand)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", - " fn(**fn_kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", - " call._call_and_handle_interrupt(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", - " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", - " return function(*args, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", - " self._run(model, ckpt_path=ckpt_path)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", - " self._data_connector.prepare_data()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", - " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", - " return fn(*args, **kwargs)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n", - " prepare_data_static(**self._init_locals)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 464, in prepare_data_static\r\n", - " src_dataset[\"train\"] = src_dataset[\"train\"].select(range(offset_val, offset_val + length_val))\r\n", - "TypeError: 'float' object cannot be interpreted as an integer\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6-D2048-E0.01 - Enwiki-4k Part 1 (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/nua9z0t5\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v2\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230929_063511-nua9z0t5/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part1.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Part 1 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-enwiki-4k-p1/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-neox-v5base-init.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e03ffed6", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:40:32.715392Z", - "iopub.status.busy": "2023-09-29T06:40:32.715097Z", - "iopub.status.idle": "2023-09-29T06:40:35.440958Z", - "shell.execute_reply": "2023-09-29T06:40:35.440200Z" - }, - "papermill": { - "duration": 2.797683, - "end_time": "2023-09-29T06:40:35.442702", - "exception": false, - "start_time": "2023-09-29T06:40:32.645019", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-09-29 06:40:34,511] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6-D2048-E0_01-enwiki-4k-p1/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6-D2048-E0_01-enwiki-4k-p1.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-enwiki-4k-p1/last.ckpt\" \"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b2d5fe57", - "metadata": { - "execution": { - "iopub.execute_input": "2023-09-29T06:40:35.588027Z", - "iopub.status.busy": "2023-09-29T06:40:35.587772Z", - "iopub.status.idle": "2023-09-29T06:40:39.405717Z", - "shell.execute_reply": "2023-09-29T06:40:39.404915Z" - }, - "papermill": { - "duration": 3.890278, - "end_time": "2023-09-29T06:40:39.407623", - "exception": false, - "start_time": "2023-09-29T06:40:35.517345", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-09-29 06:40:38,394] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6-D2048-E0_01-enwiki-4k-p1.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\" \"cuda fp32\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "papermill": { - "default_parameters": {}, - "duration": 363.056705, - "end_time": "2023-09-29T06:40:39.597218", - "environment_variables": {}, - "exception": null, - "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb", - "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb", - "parameters": {}, - "start_time": "2023-09-29T06:34:36.540513", - "version": "2.4.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file +version https://git-lfs.github.com/spec/v1 +oid sha256:7260b3fe80de461d6dc923b21af87361f71e26a4a7191d51dd9665403728ddfa +size 15732960