picocreator commited on
Commit
ba750a1
1 Parent(s): 550e990

8fa61cb158cb691aa1bb84da41ff32d779b7b4be6043cf0e368ee6c84931c0f0

Browse files
experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb CHANGED
@@ -3,13 +3,13 @@
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
- "id": "f5ebb021",
7
  "metadata": {
8
  "papermill": {
9
- "duration": 0.002819,
10
- "end_time": "2023-09-29T04:50:11.669839",
11
  "exception": false,
12
- "start_time": "2023-09-29T04:50:11.667020",
13
  "status": "completed"
14
  },
15
  "tags": []
@@ -23,13 +23,13 @@
23
  {
24
  "attachments": {},
25
  "cell_type": "markdown",
26
- "id": "6e6abc3f",
27
  "metadata": {
28
  "papermill": {
29
- "duration": 0.00214,
30
- "end_time": "2023-09-29T04:50:11.676239",
31
  "exception": false,
32
- "start_time": "2023-09-29T04:50:11.674099",
33
  "status": "completed"
34
  },
35
  "tags": []
@@ -41,19 +41,19 @@
41
  {
42
  "cell_type": "code",
43
  "execution_count": 1,
44
- "id": "3d2405bd",
45
  "metadata": {
46
  "execution": {
47
- "iopub.execute_input": "2023-09-29T04:50:11.682830Z",
48
- "iopub.status.busy": "2023-09-29T04:50:11.682139Z",
49
- "iopub.status.idle": "2023-09-29T04:50:12.432460Z",
50
- "shell.execute_reply": "2023-09-29T04:50:12.431486Z"
51
  },
52
  "papermill": {
53
- "duration": 0.756299,
54
- "end_time": "2023-09-29T04:50:12.434815",
55
  "exception": false,
56
- "start_time": "2023-09-29T04:50:11.678516",
57
  "status": "completed"
58
  },
59
  "tags": []
@@ -69,19 +69,19 @@
69
  {
70
  "cell_type": "code",
71
  "execution_count": 2,
72
- "id": "66fd5201",
73
  "metadata": {
74
  "execution": {
75
- "iopub.execute_input": "2023-09-29T04:50:12.446546Z",
76
- "iopub.status.busy": "2023-09-29T04:50:12.446098Z",
77
- "iopub.status.idle": "2023-09-29T04:50:12.454394Z",
78
- "shell.execute_reply": "2023-09-29T04:50:12.453644Z"
79
  },
80
  "papermill": {
81
- "duration": 0.018125,
82
- "end_time": "2023-09-29T04:50:12.456177",
83
  "exception": false,
84
- "start_time": "2023-09-29T04:50:12.438052",
85
  "status": "completed"
86
  },
87
  "tags": []
@@ -140,19 +140,19 @@
140
  {
141
  "cell_type": "code",
142
  "execution_count": 3,
143
- "id": "e0b56789",
144
  "metadata": {
145
  "execution": {
146
- "iopub.execute_input": "2023-09-29T04:50:12.464627Z",
147
- "iopub.status.busy": "2023-09-29T04:50:12.464037Z",
148
- "iopub.status.idle": "2023-09-29T04:50:42.488005Z",
149
- "shell.execute_reply": "2023-09-29T04:50:42.486665Z"
150
  },
151
  "papermill": {
152
- "duration": 30.031629,
153
- "end_time": "2023-09-29T04:50:42.490859",
154
  "exception": false,
155
- "start_time": "2023-09-29T04:50:12.459230",
156
  "status": "completed"
157
  },
158
  "tags": []
@@ -162,14 +162,20 @@
162
  "name": "stdout",
163
  "output_type": "stream",
164
  "text": [
165
- "[2023-09-29 04:50:16,856] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
 
 
 
 
 
 
 
166
  ]
167
  },
168
  {
169
  "name": "stdout",
170
  "output_type": "stream",
171
  "text": [
172
- "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
173
  "---- Initializing model ----\r\n",
174
  "No of layers: 6\r\n",
175
  "Embedding size: 2048\r\n",
@@ -228,42 +234,42 @@
228
  "output_type": "stream",
229
  "text": [
230
  "2048 2048 0 blocks.0.ffn.receptance.weight\r\n",
231
- "2048 7168 0 blocks.0.ffn.value.weight\r\n"
 
232
  ]
233
  },
234
  {
235
  "name": "stdout",
236
  "output_type": "stream",
237
  "text": [
238
- "2048 2048 1.0 blocks.1.att.gate.weight\r\n"
239
  ]
240
  },
241
  {
242
  "name": "stdout",
243
  "output_type": "stream",
244
  "text": [
245
- "2048 2048 1.0 blocks.1.att.receptance.weight\r\n"
246
  ]
247
  },
248
  {
249
  "name": "stdout",
250
  "output_type": "stream",
251
  "text": [
252
- "2048 2048 1.0 blocks.1.att.key.weight\r\n"
253
  ]
254
  },
255
  {
256
  "name": "stdout",
257
  "output_type": "stream",
258
  "text": [
259
- "2048 2048 1.0 blocks.1.att.value.weight\r\n"
260
  ]
261
  },
262
  {
263
  "name": "stdout",
264
  "output_type": "stream",
265
  "text": [
266
- "2048 2048 0 blocks.1.att.output.weight\r\n",
267
  "7168 2048 1.0 blocks.1.ffn.key.weight\r\n"
268
  ]
269
  },
@@ -272,42 +278,42 @@
272
  "output_type": "stream",
273
  "text": [
274
  "2048 2048 0 blocks.1.ffn.receptance.weight\r\n",
275
- "2048 7168 0 blocks.1.ffn.value.weight\r\n",
276
- "2048 2048 1.0 blocks.2.att.gate.weight\r\n"
277
  ]
278
  },
279
  {
280
  "name": "stdout",
281
  "output_type": "stream",
282
  "text": [
283
- "2048 2048 1.0 blocks.2.att.receptance.weight\r\n"
284
  ]
285
  },
286
  {
287
  "name": "stdout",
288
  "output_type": "stream",
289
  "text": [
290
- "2048 2048 1.0 blocks.2.att.key.weight\r\n"
291
  ]
292
  },
293
  {
294
  "name": "stdout",
295
  "output_type": "stream",
296
  "text": [
297
- "2048 2048 1.0 blocks.2.att.value.weight\r\n"
298
  ]
299
  },
300
  {
301
  "name": "stdout",
302
  "output_type": "stream",
303
  "text": [
304
- "2048 2048 0 blocks.2.att.output.weight\r\n"
305
  ]
306
  },
307
  {
308
  "name": "stdout",
309
  "output_type": "stream",
310
  "text": [
 
311
  "7168 2048 1.0 blocks.2.ffn.key.weight\r\n"
312
  ]
313
  },
@@ -360,13 +366,7 @@
360
  "output_type": "stream",
361
  "text": [
362
  "2048 2048 0 blocks.3.ffn.receptance.weight\r\n",
363
- "2048 7168 0 blocks.3.ffn.value.weight\r\n"
364
- ]
365
- },
366
- {
367
- "name": "stdout",
368
- "output_type": "stream",
369
- "text": [
370
  "2048 2048 1.0 blocks.4.att.gate.weight\r\n"
371
  ]
372
  },
@@ -404,13 +404,7 @@
404
  "output_type": "stream",
405
  "text": [
406
  "2048 2048 0 blocks.4.ffn.receptance.weight\r\n",
407
- "2048 7168 0 blocks.4.ffn.value.weight\r\n"
408
- ]
409
- },
410
- {
411
- "name": "stdout",
412
- "output_type": "stream",
413
- "text": [
414
  "2048 2048 1.0 blocks.5.att.gate.weight\r\n"
415
  ]
416
  },
@@ -439,13 +433,7 @@
439
  "name": "stdout",
440
  "output_type": "stream",
441
  "text": [
442
- "2048 2048 0 blocks.5.att.output.weight\r\n"
443
- ]
444
- },
445
- {
446
- "name": "stdout",
447
- "output_type": "stream",
448
- "text": [
449
  "7168 2048 1.0 blocks.5.ffn.key.weight\r\n"
450
  ]
451
  },
@@ -471,13 +459,13 @@
471
  },
472
  {
473
  "cell_type": "markdown",
474
- "id": "e3057f8b",
475
  "metadata": {
476
  "papermill": {
477
- "duration": 0.006306,
478
- "end_time": "2023-09-29T04:50:42.503924",
479
  "exception": false,
480
- "start_time": "2023-09-29T04:50:42.497618",
481
  "status": "completed"
482
  },
483
  "tags": []
@@ -489,19 +477,19 @@
489
  {
490
  "cell_type": "code",
491
  "execution_count": 4,
492
- "id": "c06c6ad2",
493
  "metadata": {
494
  "execution": {
495
- "iopub.execute_input": "2023-09-29T04:50:42.519776Z",
496
- "iopub.status.busy": "2023-09-29T04:50:42.518734Z",
497
- "iopub.status.idle": "2023-09-29T04:50:47.537394Z",
498
- "shell.execute_reply": "2023-09-29T04:50:47.535673Z"
499
  },
500
  "papermill": {
501
- "duration": 5.029265,
502
- "end_time": "2023-09-29T04:50:47.539698",
503
  "exception": false,
504
- "start_time": "2023-09-29T04:50:42.510433",
505
  "status": "completed"
506
  },
507
  "tags": []
@@ -527,19 +515,19 @@
527
  {
528
  "cell_type": "code",
529
  "execution_count": 5,
530
- "id": "4cc7e34f",
531
  "metadata": {
532
  "execution": {
533
- "iopub.execute_input": "2023-09-29T04:50:47.552392Z",
534
- "iopub.status.busy": "2023-09-29T04:50:47.551853Z",
535
- "iopub.status.idle": "2023-09-29T04:50:47.806392Z",
536
- "shell.execute_reply": "2023-09-29T04:50:47.805379Z"
537
  },
538
  "papermill": {
539
- "duration": 0.264553,
540
- "end_time": "2023-09-29T04:50:47.809133",
541
  "exception": false,
542
- "start_time": "2023-09-29T04:50:47.544580",
543
  "status": "completed"
544
  },
545
  "tags": []
@@ -549,17 +537,16 @@
549
  "name": "stdout",
550
  "output_type": "stream",
551
  "text": [
552
- "/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}\r\n"
553
  ]
554
  }
555
  ],
556
  "source": [
557
  "# Start the foundation model training\n",
558
  "!cd \"{TRAINER_DIR}\" && \\\n",
559
- " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
560
  " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
561
  " python lightning_trainer.py fit \\\n",
562
- " -c \"{NOTEBOOK_DIR}/v5base-enwiki-4k.yaml\" \\\n",
563
  " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Part 1 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
564
  " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
565
  " --trainer.devices=\"{GPU_DEVICES}\" \\\n",
@@ -572,19 +559,19 @@
572
  {
573
  "cell_type": "code",
574
  "execution_count": 6,
575
- "id": "0b3b8134",
576
  "metadata": {
577
  "execution": {
578
- "iopub.execute_input": "2023-09-29T04:50:47.825099Z",
579
- "iopub.status.busy": "2023-09-29T04:50:47.824495Z",
580
- "iopub.status.idle": "2023-09-29T04:50:48.327589Z",
581
- "shell.execute_reply": "2023-09-29T04:50:48.326466Z"
582
  },
583
  "papermill": {
584
- "duration": 0.514109,
585
- "end_time": "2023-09-29T04:50:48.330177",
586
  "exception": false,
587
- "start_time": "2023-09-29T04:50:47.816068",
588
  "status": "completed"
589
  },
590
  "tags": []
@@ -615,19 +602,19 @@
615
  {
616
  "cell_type": "code",
617
  "execution_count": 7,
618
- "id": "92869fb9",
619
  "metadata": {
620
  "execution": {
621
- "iopub.execute_input": "2023-09-29T04:50:48.346924Z",
622
- "iopub.status.busy": "2023-09-29T04:50:48.346311Z",
623
- "iopub.status.idle": "2023-09-29T04:50:48.600443Z",
624
- "shell.execute_reply": "2023-09-29T04:50:48.599423Z"
625
  },
626
  "papermill": {
627
- "duration": 0.26565,
628
- "end_time": "2023-09-29T04:50:48.603118",
629
  "exception": false,
630
- "start_time": "2023-09-29T04:50:48.337468",
631
  "status": "completed"
632
  },
633
  "tags": []
@@ -637,14 +624,28 @@
637
  "name": "stdout",
638
  "output_type": "stream",
639
  "text": [
640
- "/usr/bin/sh: 1: cd: can't cd to {INFERENCE_DIR}\r\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  ]
642
  }
643
  ],
644
  "source": [
645
  "# # Lets do a quick dragon prompt validation\n",
646
  "!cd \"{INFERENCE_DIR}\" && \\\n",
647
- " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
648
  " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\" \"cuda fp32\""
649
  ]
650
  }
@@ -669,14 +670,14 @@
669
  },
670
  "papermill": {
671
  "default_parameters": {},
672
- "duration": 38.643439,
673
- "end_time": "2023-09-29T04:50:49.032467",
674
  "environment_variables": {},
675
  "exception": null,
676
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb",
677
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb",
678
  "parameters": {},
679
- "start_time": "2023-09-29T04:50:10.389028",
680
  "version": "2.4.0"
681
  }
682
  },
 
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
+ "id": "ef458e0c",
7
  "metadata": {
8
  "papermill": {
9
+ "duration": 0.002614,
10
+ "end_time": "2023-09-29T05:06:25.725060",
11
  "exception": false,
12
+ "start_time": "2023-09-29T05:06:25.722446",
13
  "status": "completed"
14
  },
15
  "tags": []
 
23
  {
24
  "attachments": {},
25
  "cell_type": "markdown",
26
+ "id": "58eb3f3e",
27
  "metadata": {
28
  "papermill": {
29
+ "duration": 0.00201,
30
+ "end_time": "2023-09-29T05:06:25.730966",
31
  "exception": false,
32
+ "start_time": "2023-09-29T05:06:25.728956",
33
  "status": "completed"
34
  },
35
  "tags": []
 
41
  {
42
  "cell_type": "code",
43
  "execution_count": 1,
44
+ "id": "e0abbad9",
45
  "metadata": {
46
  "execution": {
47
+ "iopub.execute_input": "2023-09-29T05:06:25.737449Z",
48
+ "iopub.status.busy": "2023-09-29T05:06:25.736495Z",
49
+ "iopub.status.idle": "2023-09-29T05:06:26.482958Z",
50
+ "shell.execute_reply": "2023-09-29T05:06:26.482054Z"
51
  },
52
  "papermill": {
53
+ "duration": 0.751859,
54
+ "end_time": "2023-09-29T05:06:26.485032",
55
  "exception": false,
56
+ "start_time": "2023-09-29T05:06:25.733173",
57
  "status": "completed"
58
  },
59
  "tags": []
 
69
  {
70
  "cell_type": "code",
71
  "execution_count": 2,
72
+ "id": "42d56a7f",
73
  "metadata": {
74
  "execution": {
75
+ "iopub.execute_input": "2023-09-29T05:06:26.491452Z",
76
+ "iopub.status.busy": "2023-09-29T05:06:26.490928Z",
77
+ "iopub.status.idle": "2023-09-29T05:06:26.499148Z",
78
+ "shell.execute_reply": "2023-09-29T05:06:26.498384Z"
79
  },
80
  "papermill": {
81
+ "duration": 0.013307,
82
+ "end_time": "2023-09-29T05:06:26.500768",
83
  "exception": false,
84
+ "start_time": "2023-09-29T05:06:26.487461",
85
  "status": "completed"
86
  },
87
  "tags": []
 
140
  {
141
  "cell_type": "code",
142
  "execution_count": 3,
143
+ "id": "5514ed91",
144
  "metadata": {
145
  "execution": {
146
+ "iopub.execute_input": "2023-09-29T05:06:26.507274Z",
147
+ "iopub.status.busy": "2023-09-29T05:06:26.506786Z",
148
+ "iopub.status.idle": "2023-09-29T05:06:55.991075Z",
149
+ "shell.execute_reply": "2023-09-29T05:06:55.990231Z"
150
  },
151
  "papermill": {
152
+ "duration": 29.490941,
153
+ "end_time": "2023-09-29T05:06:55.994238",
154
  "exception": false,
155
+ "start_time": "2023-09-29T05:06:26.503297",
156
  "status": "completed"
157
  },
158
  "tags": []
 
162
  "name": "stdout",
163
  "output_type": "stream",
164
  "text": [
165
+ "[2023-09-29 05:06:30,625] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
166
+ ]
167
+ },
168
+ {
169
+ "name": "stdout",
170
+ "output_type": "stream",
171
+ "text": [
172
+ "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
173
  ]
174
  },
175
  {
176
  "name": "stdout",
177
  "output_type": "stream",
178
  "text": [
 
179
  "---- Initializing model ----\r\n",
180
  "No of layers: 6\r\n",
181
  "Embedding size: 2048\r\n",
 
234
  "output_type": "stream",
235
  "text": [
236
  "2048 2048 0 blocks.0.ffn.receptance.weight\r\n",
237
+ "2048 7168 0 blocks.0.ffn.value.weight\r\n",
238
+ "2048 2048 1.0 blocks.1.att.gate.weight\r\n"
239
  ]
240
  },
241
  {
242
  "name": "stdout",
243
  "output_type": "stream",
244
  "text": [
245
+ "2048 2048 1.0 blocks.1.att.receptance.weight\r\n"
246
  ]
247
  },
248
  {
249
  "name": "stdout",
250
  "output_type": "stream",
251
  "text": [
252
+ "2048 2048 1.0 blocks.1.att.key.weight\r\n"
253
  ]
254
  },
255
  {
256
  "name": "stdout",
257
  "output_type": "stream",
258
  "text": [
259
+ "2048 2048 1.0 blocks.1.att.value.weight\r\n"
260
  ]
261
  },
262
  {
263
  "name": "stdout",
264
  "output_type": "stream",
265
  "text": [
266
+ "2048 2048 0 blocks.1.att.output.weight\r\n"
267
  ]
268
  },
269
  {
270
  "name": "stdout",
271
  "output_type": "stream",
272
  "text": [
 
273
  "7168 2048 1.0 blocks.1.ffn.key.weight\r\n"
274
  ]
275
  },
 
278
  "output_type": "stream",
279
  "text": [
280
  "2048 2048 0 blocks.1.ffn.receptance.weight\r\n",
281
+ "2048 7168 0 blocks.1.ffn.value.weight\r\n"
 
282
  ]
283
  },
284
  {
285
  "name": "stdout",
286
  "output_type": "stream",
287
  "text": [
288
+ "2048 2048 1.0 blocks.2.att.gate.weight\r\n"
289
  ]
290
  },
291
  {
292
  "name": "stdout",
293
  "output_type": "stream",
294
  "text": [
295
+ "2048 2048 1.0 blocks.2.att.receptance.weight\r\n"
296
  ]
297
  },
298
  {
299
  "name": "stdout",
300
  "output_type": "stream",
301
  "text": [
302
+ "2048 2048 1.0 blocks.2.att.key.weight\r\n"
303
  ]
304
  },
305
  {
306
  "name": "stdout",
307
  "output_type": "stream",
308
  "text": [
309
+ "2048 2048 1.0 blocks.2.att.value.weight\r\n"
310
  ]
311
  },
312
  {
313
  "name": "stdout",
314
  "output_type": "stream",
315
  "text": [
316
+ "2048 2048 0 blocks.2.att.output.weight\r\n",
317
  "7168 2048 1.0 blocks.2.ffn.key.weight\r\n"
318
  ]
319
  },
 
366
  "output_type": "stream",
367
  "text": [
368
  "2048 2048 0 blocks.3.ffn.receptance.weight\r\n",
369
+ "2048 7168 0 blocks.3.ffn.value.weight\r\n",
 
 
 
 
 
 
370
  "2048 2048 1.0 blocks.4.att.gate.weight\r\n"
371
  ]
372
  },
 
404
  "output_type": "stream",
405
  "text": [
406
  "2048 2048 0 blocks.4.ffn.receptance.weight\r\n",
407
+ "2048 7168 0 blocks.4.ffn.value.weight\r\n",
 
 
 
 
 
 
408
  "2048 2048 1.0 blocks.5.att.gate.weight\r\n"
409
  ]
410
  },
 
433
  "name": "stdout",
434
  "output_type": "stream",
435
  "text": [
436
+ "2048 2048 0 blocks.5.att.output.weight\r\n",
 
 
 
 
 
 
437
  "7168 2048 1.0 blocks.5.ffn.key.weight\r\n"
438
  ]
439
  },
 
459
  },
460
  {
461
  "cell_type": "markdown",
462
+ "id": "8afd9e50",
463
  "metadata": {
464
  "papermill": {
465
+ "duration": 0.005752,
466
+ "end_time": "2023-09-29T05:06:56.006385",
467
  "exception": false,
468
+ "start_time": "2023-09-29T05:06:56.000633",
469
  "status": "completed"
470
  },
471
  "tags": []
 
477
  {
478
  "cell_type": "code",
479
  "execution_count": 4,
480
+ "id": "ff78d2bd",
481
  "metadata": {
482
  "execution": {
483
+ "iopub.execute_input": "2023-09-29T05:06:56.020959Z",
484
+ "iopub.status.busy": "2023-09-29T05:06:56.020447Z",
485
+ "iopub.status.idle": "2023-09-29T05:07:01.579575Z",
486
+ "shell.execute_reply": "2023-09-29T05:07:01.578476Z"
487
  },
488
  "papermill": {
489
+ "duration": 5.569483,
490
+ "end_time": "2023-09-29T05:07:01.582319",
491
  "exception": false,
492
+ "start_time": "2023-09-29T05:06:56.012836",
493
  "status": "completed"
494
  },
495
  "tags": []
 
515
  {
516
  "cell_type": "code",
517
  "execution_count": 5,
518
+ "id": "f656d56b",
519
  "metadata": {
520
  "execution": {
521
+ "iopub.execute_input": "2023-09-29T05:07:01.598719Z",
522
+ "iopub.status.busy": "2023-09-29T05:07:01.597947Z",
523
+ "iopub.status.idle": "2023-09-29T05:07:01.851778Z",
524
+ "shell.execute_reply": "2023-09-29T05:07:01.850738Z"
525
  },
526
  "papermill": {
527
+ "duration": 0.265316,
528
+ "end_time": "2023-09-29T05:07:01.854564",
529
  "exception": false,
530
+ "start_time": "2023-09-29T05:07:01.589248",
531
  "status": "completed"
532
  },
533
  "tags": []
 
537
  "name": "stdout",
538
  "output_type": "stream",
539
  "text": [
540
+ "/usr/bin/sh: 1: python: not found\r\n"
541
  ]
542
  }
543
  ],
544
  "source": [
545
  "# Start the foundation model training\n",
546
  "!cd \"{TRAINER_DIR}\" && \\\n",
 
547
  " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
548
  " python lightning_trainer.py fit \\\n",
549
+ " -c \"{NOTEBOOK_DIR}/v5base-enwiki-4k-part1.yaml\" \\\n",
550
  " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Part 1 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
551
  " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
552
  " --trainer.devices=\"{GPU_DEVICES}\" \\\n",
 
559
  {
560
  "cell_type": "code",
561
  "execution_count": 6,
562
+ "id": "c7b46f94",
563
  "metadata": {
564
  "execution": {
565
+ "iopub.execute_input": "2023-09-29T05:07:01.871225Z",
566
+ "iopub.status.busy": "2023-09-29T05:07:01.870345Z",
567
+ "iopub.status.idle": "2023-09-29T05:07:02.373808Z",
568
+ "shell.execute_reply": "2023-09-29T05:07:02.372753Z"
569
  },
570
  "papermill": {
571
+ "duration": 0.51526,
572
+ "end_time": "2023-09-29T05:07:02.376685",
573
  "exception": false,
574
+ "start_time": "2023-09-29T05:07:01.861425",
575
  "status": "completed"
576
  },
577
  "tags": []
 
602
  {
603
  "cell_type": "code",
604
  "execution_count": 7,
605
+ "id": "9f558c57",
606
  "metadata": {
607
  "execution": {
608
+ "iopub.execute_input": "2023-09-29T05:07:02.393471Z",
609
+ "iopub.status.busy": "2023-09-29T05:07:02.392695Z",
610
+ "iopub.status.idle": "2023-09-29T05:07:08.804315Z",
611
+ "shell.execute_reply": "2023-09-29T05:07:08.803244Z"
612
  },
613
  "papermill": {
614
+ "duration": 6.42299,
615
+ "end_time": "2023-09-29T05:07:08.806769",
616
  "exception": false,
617
+ "start_time": "2023-09-29T05:07:02.383779",
618
  "status": "completed"
619
  },
620
  "tags": []
 
624
  "name": "stdout",
625
  "output_type": "stream",
626
  "text": [
627
+ "[2023-09-29 05:07:06,749] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
628
+ ]
629
+ },
630
+ {
631
+ "name": "stdout",
632
+ "output_type": "stream",
633
+ "text": [
634
+ "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
635
+ "Traceback (most recent call last):\r\n",
636
+ " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in <module>\r\n",
637
+ " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n",
638
+ " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n",
639
+ " self.model = RWKV(**model_config)\r\n",
640
+ " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n",
641
+ " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
642
+ "ValueError: load_model file '../model/v5-L6-D2048-E0_01-enwiki-4k-p1.pth' does not exist\r\n"
643
  ]
644
  }
645
  ],
646
  "source": [
647
  "# # Lets do a quick dragon prompt validation\n",
648
  "!cd \"{INFERENCE_DIR}\" && \\\n",
 
649
  " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\" \"cuda fp32\""
650
  ]
651
  }
 
670
  },
671
  "papermill": {
672
  "default_parameters": {},
673
+ "duration": 44.644446,
674
+ "end_time": "2023-09-29T05:07:09.133994",
675
  "environment_variables": {},
676
  "exception": null,
677
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb",
678
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb",
679
  "parameters": {},
680
+ "start_time": "2023-09-29T05:06:24.489548",
681
  "version": "2.4.0"
682
  }
683
  },