Rodrigo1771 commited on
Commit
988fd5d
1 Parent(s): 5109da0

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - distemist-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,24 +18,24 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: distemist-ner
21
- type: distemist-ner
22
  config: DisTEMIST NER
23
  split: validation
24
  args: DisTEMIST NER
25
  metrics:
26
  - name: Precision
27
  type: precision
28
- value: 0.7882031427920747
29
  - name: Recall
30
  type: recall
31
- value: 0.8097800655124006
32
  - name: F1
33
  type: f1
34
- value: 0.7988459319099828
35
  - name: Accuracy
36
  type: accuracy
37
- value: 0.9766776058330014
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -42,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the distemist-ner dataset.
46
  It achieves the following results on the evaluation set:
47
- - Loss: 0.1367
48
- - Precision: 0.7882
49
- - Recall: 0.8098
50
- - F1: 0.7988
51
- - Accuracy: 0.9767
52
 
53
  ## Model description
54
 
 
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/distemist-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/distemist-ner
22
+ type: Rodrigo1771/distemist-ner
23
  config: DisTEMIST NER
24
  split: validation
25
  args: DisTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.7938948817994033
30
  - name: Recall
31
  type: recall
32
+ value: 0.8093121197941039
33
  - name: F1
34
  type: f1
35
+ value: 0.8015293708724366
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.9767668584453568
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/distemist-ner dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.1294
49
+ - Precision: 0.7939
50
+ - Recall: 0.8093
51
+ - F1: 0.8015
52
+ - Accuracy: 0.9768
53
 
54
  ## Model description
55
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 9.988249118683902,
3
- "eval_accuracy": 0.7932840841995413,
4
- "eval_f1": 0.17333222008850296,
5
- "eval_loss": 1.0502684116363525,
6
- "eval_precision": 0.09532555790247038,
7
- "eval_recall": 0.9540441176470589,
8
- "eval_runtime": 14.6851,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 463.735,
11
- "eval_steps_per_second": 58.018,
12
- "predict_accuracy": 0.8808544760053879,
13
- "predict_f1": 0.24078298623886968,
14
- "predict_loss": 0.62894606590271,
15
- "predict_precision": 0.1378032345013477,
16
- "predict_recall": 0.9528246942341293,
17
- "predict_runtime": 29.8715,
18
- "predict_samples_per_second": 489.229,
19
- "predict_steps_per_second": 61.162,
20
- "total_flos": 1.2649810588547778e+16,
21
- "train_loss": 0.10639642311544979,
22
- "train_runtime": 1208.2019,
23
  "train_samples": 27229,
24
- "train_samples_per_second": 225.368,
25
- "train_steps_per_second": 3.518
26
  }
 
1
  {
2
  "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.9767668584453568,
4
+ "eval_f1": 0.8015293708724366,
5
+ "eval_loss": 0.1294233798980713,
6
+ "eval_precision": 0.7938948817994033,
7
+ "eval_recall": 0.8093121197941039,
8
+ "eval_runtime": 14.4843,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 470.164,
11
+ "eval_steps_per_second": 58.822,
12
+ "predict_accuracy": 0.9765722208397027,
13
+ "predict_f1": 0.8011714125490684,
14
+ "predict_loss": 0.12331698834896088,
15
+ "predict_precision": 0.7871923594955308,
16
+ "predict_recall": 0.8156559248921593,
17
+ "predict_runtime": 29.3641,
18
+ "predict_samples_per_second": 497.683,
19
+ "predict_steps_per_second": 62.219,
20
+ "total_flos": 1.2649124434987926e+16,
21
+ "train_loss": 0.02590363489880281,
22
+ "train_runtime": 1203.9865,
23
  "train_samples": 27229,
24
+ "train_samples_per_second": 226.157,
25
+ "train_steps_per_second": 3.53
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 9.988249118683902,
3
- "eval_accuracy": 0.7932840841995413,
4
- "eval_f1": 0.17333222008850296,
5
- "eval_loss": 1.0502684116363525,
6
- "eval_precision": 0.09532555790247038,
7
- "eval_recall": 0.9540441176470589,
8
- "eval_runtime": 14.6851,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 463.735,
11
- "eval_steps_per_second": 58.018
12
  }
 
1
  {
2
  "epoch": 9.988249118683902,
3
+ "eval_accuracy": 0.9767668584453568,
4
+ "eval_f1": 0.8015293708724366,
5
+ "eval_loss": 0.1294233798980713,
6
+ "eval_precision": 0.7938948817994033,
7
+ "eval_recall": 0.8093121197941039,
8
+ "eval_runtime": 14.4843,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 470.164,
11
+ "eval_steps_per_second": 58.822
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.8808544760053879,
3
- "predict_f1": 0.24078298623886968,
4
- "predict_loss": 0.62894606590271,
5
- "predict_precision": 0.1378032345013477,
6
- "predict_recall": 0.9528246942341293,
7
- "predict_runtime": 29.8715,
8
- "predict_samples_per_second": 489.229,
9
- "predict_steps_per_second": 61.162
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9765722208397027,
3
+ "predict_f1": 0.8011714125490684,
4
+ "predict_loss": 0.12331698834896088,
5
+ "predict_precision": 0.7871923594955308,
6
+ "predict_recall": 0.8156559248921593,
7
+ "predict_runtime": 29.3641,
8
+ "predict_samples_per_second": 497.683,
9
+ "predict_steps_per_second": 62.219
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725050776.6b97e535edda.20735.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695bf5e0a27bceb5f4eb0bc971d0b2f97725e31e30a7b30d5ffedc4ae0991cc9
3
+ size 560
train.log CHANGED
@@ -1509,3 +1509,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1509
  {'eval_loss': 0.13674204051494598, 'eval_precision': 0.7882031427920747, 'eval_recall': 0.8097800655124006, 'eval_f1': 0.7988459319099828, 'eval_accuracy': 0.9766776058330014, 'eval_runtime': 14.8828, 'eval_samples_per_second': 457.575, 'eval_steps_per_second': 57.247, 'epoch': 9.99}
1510
  {'train_runtime': 1203.9865, 'train_samples_per_second': 226.157, 'train_steps_per_second': 3.53, 'train_loss': 0.02590363489880281, 'epoch': 9.99}
1511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1512
  0%| | 0/852 [00:00<?, ?it/s]
1513
  1%| | 10/852 [00:00<00:09, 91.43it/s]
1514
  2%|▏ | 20/852 [00:00<00:10, 81.20it/s]
1515
  3%|▎ | 29/852 [00:00<00:10, 75.61it/s]
1516
  4%|▍ | 37/852 [00:00<00:10, 76.54it/s]
1517
  5%|▌ | 46/852 [00:00<00:10, 79.06it/s]
1518
  6%|▋ | 55/852 [00:00<00:09, 80.42it/s]
1519
  8%|▊ | 64/852 [00:00<00:09, 80.69it/s]
1520
  9%|▊ | 73/852 [00:00<00:09, 78.84it/s]
1521
  10%|▉ | 81/852 [00:01<00:09, 78.62it/s]
1522
  11%|█ | 90/852 [00:01<00:09, 79.21it/s]
1523
  12%|█▏ | 98/852 [00:01<00:09, 79.05it/s]
1524
  13%|█▎ | 107/852 [00:01<00:09, 78.52it/s]
1525
  13%|█▎ | 115/852 [00:01<00:09, 78.70it/s]
1526
  15%|█▍ | 124/852 [00:01<00:09, 80.68it/s]
1527
  16%|█▌ | 133/852 [00:01<00:09, 76.87it/s]
1528
  17%|█▋ | 141/852 [00:01<00:09, 77.17it/s]
1529
  17%|█▋ | 149/852 [00:01<00:09, 76.66it/s]
1530
  19%|█▊ | 158/852 [00:02<00:08, 78.34it/s]
1531
  20%|█▉ | 167/852 [00:02<00:08, 79.21it/s]
1532
  21%|██ | 175/852 [00:02<00:08, 79.28it/s]
1533
  22%|██▏ | 184/852 [00:02<00:08, 79.85it/s]
1534
  23%|██▎ | 192/852 [00:02<00:08, 77.56it/s]
1535
  24%|██▎ | 201/852 [00:02<00:08, 79.28it/s]
1536
  25%|██▍ | 209/852 [00:02<00:08, 78.02it/s]
1537
  25%|██▌ | 217/852 [00:02<00:08, 78.36it/s]
1538
  26%|██▋ | 225/852 [00:02<00:07, 78.57it/s]
1539
  27%|██▋ | 234/852 [00:02<00:07, 79.89it/s]
1540
  28%|██▊ | 242/852 [00:03<00:07, 76.69it/s]
1541
  29%|██▉ | 250/852 [00:03<00:07, 77.50it/s]
1542
  30%|███ | 259/852 [00:03<00:07, 78.25it/s]
1543
  31%|███▏ | 267/852 [00:03<00:07, 77.79it/s]
1544
  32%|███▏ | 276/852 [00:03<00:07, 79.12it/s]
1545
  33%|███▎ | 285/852 [00:03<00:07, 79.97it/s]
1546
  34%|███▍ | 293/852 [00:03<00:07, 79.57it/s]
1547
  35%|███▌ | 302/852 [00:03<00:06, 80.12it/s]
1548
  37%|███▋ | 311/852 [00:03<00:06, 81.11it/s]
1549
  38%|███▊ | 320/852 [00:04<00:06, 77.46it/s]
1550
  38%|███▊ | 328/852 [00:04<00:06, 76.62it/s]
1551
  39%|███▉ | 336/852 [00:04<00:06, 76.07it/s]
1552
  40%|████ | 344/852 [00:04<00:06, 77.03it/s]
1553
  41%|████▏ | 352/852 [00:04<00:06, 77.09it/s]
1554
  42%|████▏ | 360/852 [00:04<00:06, 75.43it/s]
1555
  43%|████▎ | 368/852 [00:04<00:06, 76.71it/s]
1556
  44%|████▍ | 376/852 [00:04<00:06, 77.23it/s]
1557
  45%|████▌ | 384/852 [00:04<00:06, 76.84it/s]
1558
  46%|████▌ | 392/852 [00:05<00:05, 77.70it/s]
1559
  47%|████▋ | 400/852 [00:05<00:05, 77.92it/s]
1560
  48%|████▊ | 408/852 [00:05<00:05, 76.03it/s]
1561
  49%|████▉ | 417/852 [00:05<00:05, 77.02it/s]
1562
  50%|████▉ | 425/852 [00:05<00:05, 75.06it/s]
1563
  51%|█████ | 434/852 [00:05<00:05, 77.48it/s]
1564
  52%|█████▏ | 443/852 [00:05<00:05, 77.92it/s]
1565
  53%|█████▎ | 452/852 [00:05<00:05, 78.84it/s]
1566
  54%|█████▍ | 460/852 [00:05<00:04, 78.43it/s]
1567
  55%|█████▍ | 468/852 [00:05<00:04, 76.81it/s]
1568
  56%|█████▌ | 476/852 [00:06<00:05, 74.29it/s]
1569
  57%|█████▋ | 484/852 [00:06<00:04, 74.53it/s]
1570
  58%|█████▊ | 493/852 [00:06<00:04, 76.53it/s]
1571
  59%|█████▉ | 502/852 [00:06<00:04, 77.44it/s]
1572
  60%|█████▉ | 510/852 [00:06<00:04, 77.79it/s]
1573
  61%|██████ | 519/852 [00:06<00:04, 79.51it/s]
1574
  62%|██████▏ | 527/852 [00:06<00:04, 77.59it/s]
1575
  63%|██████▎ | 536/852 [00:06<00:03, 79.53it/s]
1576
  64%|██████▍ | 545/852 [00:06<00:03, 80.00it/s]
1577
  65%|██████▌ | 554/852 [00:07<00:03, 77.58it/s]
1578
  66%|██████▌ | 562/852 [00:07<00:03, 76.79it/s]
1579
  67%|██████▋ | 571/852 [00:07<00:03, 78.19it/s]
1580
  68%|██████▊ | 579/852 [00:07<00:03, 76.97it/s]
1581
  69%|██████▉ | 587/852 [00:07<00:03, 76.55it/s]
1582
  70%|██████▉ | 596/852 [00:07<00:03, 78.37it/s]
1583
  71%|███████ | 604/852 [00:07<00:03, 78.38it/s]
1584
  72%|███████▏ | 612/852 [00:07<00:03, 77.21it/s]
1585
  73%|███████▎ | 620/852 [00:07<00:02, 77.48it/s]
1586
  74%|███████▎ | 628/852 [00:08<00:02, 76.66it/s]
1587
  75%|███████▍ | 636/852 [00:08<00:02, 76.28it/s]
1588
  76%|███████▌ | 644/852 [00:08<00:02, 70.79it/s]
1589
  77%|███████▋ | 652/852 [00:08<00:02, 71.23it/s]
1590
  78%|███████▊ | 661/852 [00:08<00:02, 74.26it/s]
1591
  79%|███████▊ | 669/852 [00:08<00:02, 74.79it/s]
1592
  79%|███████▉ | 677/852 [00:08<00:02, 75.78it/s]
1593
  81%|████████ | 686/852 [00:08<00:02, 77.22it/s]
1594
  82%|████████▏ | 695/852 [00:08<00:01, 78.85it/s]
1595
  83%|████████▎ | 704/852 [00:09<00:01, 80.60it/s]
1596
  84%|████████▎ | 713/852 [00:09<00:01, 81.65it/s]
1597
  85%|████████▍ | 722/852 [00:09<00:01, 80.17it/s]
1598
  86%|████████▌ | 731/852 [00:09<00:01, 79.99it/s]
1599
  87%|████████▋ | 740/852 [00:09<00:01, 80.97it/s]
1600
  88%|████████▊ | 749/852 [00:09<00:01, 81.10it/s]
1601
  89%|████████▉ | 758/852 [00:09<00:01, 81.97it/s]
1602
  90%|█████████ | 767/852 [00:09<00:01, 80.55it/s]
1603
  91%|█████████ | 776/852 [00:09<00:00, 80.68it/s]
1604
  92%|█████████▏| 785/852 [00:10<00:00, 79.64it/s]
1605
  93%|█████████▎| 793/852 [00:10<00:00, 79.71it/s]
1606
  94%|█████████▍| 801/852 [00:10<00:00, 79.62it/s]
1607
  95%|█████████▍| 809/852 [00:10<00:00, 77.20it/s]
1608
  96%|█████████▌| 817/852 [00:10<00:00, 76.64it/s]
1609
  97%|█████████▋| 826/852 [00:10<00:00, 78.45it/s]
1610
  98%|█████████▊| 835/852 [00:10<00:00, 79.43it/s]
1611
  99%|█████████▉| 843/852 [00:10<00:00, 78.34it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1612
  0%| | 0/1827 [00:00<?, ?it/s]
1613
  1%| | 10/1827 [00:00<00:20, 88.35it/s]
1614
  1%| | 19/1827 [00:00<00:23, 78.49it/s]
1615
  1%|▏ | 27/1827 [00:00<00:23, 77.09it/s]
1616
  2%|▏ | 35/1827 [00:00<00:23, 76.98it/s]
1617
  2%|▏ | 44/1827 [00:00<00:22, 78.90it/s]
1618
  3%|▎ | 52/1827 [00:00<00:22, 77.56it/s]
1619
  3%|▎ | 60/1827 [00:00<00:23, 75.36it/s]
1620
  4%|▎ | 68/1827 [00:00<00:23, 75.95it/s]
1621
  4%|▍ | 76/1827 [00:00<00:22, 76.90it/s]
1622
  5%|▍ | 84/1827 [00:01<00:22, 77.57it/s]
1623
  5%|▌ | 92/1827 [00:01<00:22, 78.03it/s]
1624
  5%|▌ | 100/1827 [00:01<00:22, 77.44it/s]
1625
  6%|▌ | 109/1827 [00:01<00:21, 79.23it/s]
1626
  6%|▋ | 117/1827 [00:01<00:21, 77.97it/s]
1627
  7%|▋ | 125/1827 [00:01<00:21, 77.49it/s]
1628
  7%|▋ | 134/1827 [00:01<00:21, 78.51it/s]
1629
  8%|▊ | 143/1827 [00:01<00:21, 80.08it/s]
1630
  8%|▊ | 152/1827 [00:01<00:20, 79.98it/s]
1631
  9%|▉ | 160/1827 [00:02<00:21, 76.81it/s]
1632
  9%|▉ | 169/1827 [00:02<00:21, 78.19it/s]
1633
  10%|▉ | 178/1827 [00:02<00:20, 80.05it/s]
1634
  10%|█ | 187/1827 [00:02<00:20, 81.05it/s]
1635
  11%|█ | 196/1827 [00:02<00:20, 80.06it/s]
1636
  11%|█ | 205/1827 [00:02<00:20, 79.21it/s]
1637
  12%|█▏ | 214/1827 [00:02<00:20, 78.84it/s]
1638
  12%|█▏ | 222/1827 [00:02<00:20, 78.64it/s]
1639
  13%|█▎ | 230/1827 [00:02<00:20, 77.74it/s]
1640
  13%|█▎ | 238/1827 [00:03<00:20, 76.28it/s]
1641
  14%|█▎ | 247/1827 [00:03<00:20, 78.47it/s]
1642
  14%|█▍ | 256/1827 [00:03<00:19, 79.66it/s]
1643
  14%|█▍ | 264/1827 [00:03<00:19, 79.04it/s]
1644
  15%|█▍ | 273/1827 [00:03<00:19, 80.83it/s]
1645
  15%|█▌ | 282/1827 [00:03<00:18, 81.79it/s]
1646
  16%|█▌ | 291/1827 [00:03<00:19, 80.01it/s]
1647
  16%|█▋ | 300/1827 [00:03<00:18, 80.85it/s]
1648
  17%|█▋ | 309/1827 [00:03<00:19, 79.16it/s]
1649
  17%|█▋ | 317/1827 [00:04<00:19, 79.20it/s]
1650
  18%|█▊ | 326/1827 [00:04<00:18, 80.90it/s]
1651
  18%|█▊ | 335/1827 [00:04<00:18, 80.27it/s]
1652
  19%|█▉ | 344/1827 [00:04<00:18, 81.58it/s]
1653
  19%|█▉ | 353/1827 [00:04<00:19, 76.68it/s]
1654
  20%|█▉ | 361/1827 [00:04<00:18, 77.41it/s]
1655
  20%|██ | 370/1827 [00:04<00:18, 78.63it/s]
1656
  21%|██ | 379/1827 [00:04<00:18, 79.94it/s]
1657
  21%|██ | 388/1827 [00:04<00:18, 79.31it/s]
1658
  22%|██▏ | 396/1827 [00:05<00:18, 78.99it/s]
1659
  22%|██▏ | 405/1827 [00:05<00:17, 80.86it/s]
1660
  23%|██▎ | 414/1827 [00:05<00:17, 79.00it/s]
1661
  23%|██▎ | 423/1827 [00:05<00:17, 80.11it/s]
1662
  24%|██▎ | 432/1827 [00:05<00:17, 79.18it/s]
1663
  24%|██▍ | 440/1827 [00:05<00:17, 79.33it/s]
1664
  25%|██▍ | 448/1827 [00:05<00:17, 79.41it/s]
1665
  25%|██▌ | 457/1827 [00:05<00:17, 80.52it/s]
1666
  26%|██▌ | 466/1827 [00:05<00:16, 80.24it/s]
1667
  26%|██▌ | 475/1827 [00:06<00:17, 78.66it/s]
1668
  26%|██▋ | 483/1827 [00:06<00:17, 78.64it/s]
1669
  27%|██▋ | 491/1827 [00:06<00:17, 77.35it/s]
1670
  27%|██▋ | 500/1827 [00:06<00:16, 78.45it/s]
1671
  28%|██▊ | 508/1827 [00:06<00:16, 78.32it/s]
1672
  28%|██▊ | 516/1827 [00:06<00:16, 78.75it/s]
1673
  29%|██▊ | 525/1827 [00:06<00:16, 78.79it/s]
1674
  29%|██▉ | 533/1827 [00:06<00:16, 78.81it/s]
1675
  30%|██▉ | 541/1827 [00:06<00:16, 79.14it/s]
1676
  30%|███ | 550/1827 [00:06<00:16, 79.35it/s]
1677
  31%|███ | 559/1827 [00:07<00:16, 79.12it/s]
1678
  31%|███ | 568/1827 [00:07<00:15, 81.14it/s]
1679
  32%|███▏ | 577/1827 [00:07<00:15, 82.20it/s]
1680
  32%|███▏ | 586/1827 [00:07<00:15, 81.32it/s]
1681
  33%|███▎ | 595/1827 [00:07<00:15, 81.45it/s]
1682
  33%|███▎ | 604/1827 [00:07<00:14, 81.95it/s]
1683
  34%|███▎ | 613/1827 [00:07<00:15, 80.89it/s]
1684
  34%|███▍ | 622/1827 [00:07<00:14, 80.81it/s]
1685
  35%|███▍ | 631/1827 [00:07<00:14, 82.11it/s]
1686
  35%|███▌ | 640/1827 [00:08<00:14, 82.74it/s]
1687
  36%|███▌ | 649/1827 [00:08<00:14, 80.60it/s]
1688
  36%|███▌ | 658/1827 [00:08<00:14, 78.56it/s]
1689
  37%|███▋ | 667/1827 [00:08<00:14, 80.49it/s]
1690
  37%|███▋ | 676/1827 [00:08<00:14, 80.58it/s]
1691
  37%|███▋ | 685/1827 [00:08<00:13, 81.73it/s]
1692
  38%|███▊ | 694/1827 [00:08<00:13, 82.81it/s]
1693
  38%|███▊ | 703/1827 [00:08<00:13, 81.28it/s]
1694
  39%|███▉ | 712/1827 [00:08<00:13, 82.12it/s]
1695
  39%|███▉ | 721/1827 [00:09<00:13, 83.16it/s]
1696
  40%|███▉ | 730/1827 [00:09<00:13, 83.47it/s]
1697
  40%|████ | 739/1827 [00:09<00:12, 83.79it/s]
1698
  41%|████ | 748/1827 [00:09<00:12, 84.99it/s]
1699
  41%|████▏ | 757/1827 [00:09<00:12, 85.15it/s]
1700
  42%|████▏ | 766/1827 [00:09<00:12, 85.06it/s]
1701
  42%|████▏ | 775/1827 [00:09<00:12, 85.29it/s]
1702
  43%|████▎ | 784/1827 [00:09<00:12, 81.63it/s]
1703
  43%|████▎ | 793/1827 [00:09<00:12, 82.24it/s]
1704
  44%|████▍ | 802/1827 [00:10<00:12, 81.57it/s]
1705
  44%|████▍ | 811/1827 [00:10<00:12, 82.68it/s]
1706
  45%|████▍ | 820/1827 [00:10<00:12, 82.99it/s]
1707
  45%|████▌ | 829/1827 [00:10<00:11, 83.82it/s]
1708
  46%|████▌ | 838/1827 [00:10<00:12, 82.25it/s]
1709
  46%|████▋ | 847/1827 [00:10<00:11, 83.20it/s]
1710
  47%|████▋ | 856/1827 [00:10<00:11, 84.21it/s]
1711
  47%|████▋ | 865/1827 [00:10<00:11, 84.27it/s]
1712
  48%|████▊ | 874/1827 [00:10<00:11, 80.96it/s]
1713
  48%|████▊ | 883/1827 [00:11<00:11, 81.30it/s]
1714
  49%|████▉ | 892/1827 [00:11<00:11, 80.78it/s]
1715
  49%|████▉ | 901/1827 [00:11<00:11, 82.19it/s]
1716
  50%|████▉ | 910/1827 [00:11<00:11, 79.40it/s]
1717
  50%|█████ | 919/1827 [00:11<00:11, 79.95it/s]
1718
  51%|█████ | 928/1827 [00:11<00:11, 80.89it/s]
1719
  51%|█████▏ | 937/1827 [00:11<00:11, 79.25it/s]
1720
  52%|█████▏ | 945/1827 [00:11<00:11, 79.00it/s]
1721
  52%|█████▏ | 953/1827 [00:11<00:11, 77.83it/s]
1722
  53%|█████▎ | 962/1827 [00:12<00:10, 78.70it/s]
1723
  53%|█████▎ | 971/1827 [00:12<00:10, 80.17it/s]
1724
  54%|█████▎ | 980/1827 [00:12<00:10, 79.85it/s]
1725
  54%|█████▍ | 989/1827 [00:12<00:10, 81.39it/s]
1726
  55%|█████▍ | 998/1827 [00:12<00:10, 82.20it/s]
1727
  55%|█████▌ | 1007/1827 [00:12<00:10, 81.27it/s]
1728
  56%|█████▌ | 1016/1827 [00:12<00:10, 80.65it/s]
1729
  56%|█████▌ | 1025/1827 [00:12<00:09, 81.53it/s]
1730
  57%|█████▋ | 1034/1827 [00:12<00:09, 79.74it/s]
1731
  57%|█████▋ | 1043/1827 [00:13<00:09, 80.67it/s]
1732
  58%|█████▊ | 1052/1827 [00:13<00:09, 81.68it/s]
1733
  58%|█████▊ | 1061/1827 [00:13<00:09, 82.32it/s]
1734
  59%|█████▊ | 1070/1827 [00:13<00:09, 83.48it/s]
1735
  59%|█████▉ | 1079/1827 [00:13<00:08, 83.94it/s]
1736
  60%|█████▉ | 1088/1827 [00:13<00:08, 83.86it/s]
1737
  60%|██████ | 1097/1827 [00:13<00:08, 83.97it/s]
1738
  61%|██████ | 1106/1827 [00:13<00:08, 83.02it/s]
1739
  61%|██████ | 1115/1827 [00:13<00:08, 81.44it/s]
1740
  62%|██████▏ | 1124/1827 [00:13<00:08, 81.63it/s]
1741
  62%|██████▏ | 1133/1827 [00:14<00:08, 81.21it/s]
1742
  63%|██████▎ | 1142/1827 [00:14<00:08, 82.28it/s]
1743
  63%|██████▎ | 1151/1827 [00:14<00:08, 83.08it/s]
1744
  63%|██████▎ | 1160/1827 [00:14<00:08, 81.13it/s]
1745
  64%|██████▍ | 1169/1827 [00:14<00:08, 81.26it/s]
1746
  64%|██████▍ | 1178/1827 [00:14<00:07, 82.06it/s]
1747
  65%|██████▍ | 1187/1827 [00:14<00:08, 77.70it/s]
1748
  65%|██████▌ | 1196/1827 [00:14<00:08, 78.65it/s]
1749
  66%|██████▌ | 1205/1827 [00:14<00:07, 80.19it/s]
1750
  66%|██████▋ | 1214/1827 [00:15<00:07, 78.78it/s]
1751
  67%|██████▋ | 1223/1827 [00:15<00:07, 79.88it/s]
1752
  67%|██████▋ | 1232/1827 [00:15<00:07, 81.80it/s]
1753
  68%|██████▊ | 1241/1827 [00:15<00:07, 82.09it/s]
1754
  68%|██████▊ | 1250/1827 [00:15<00:06, 82.44it/s]
1755
  69%|██████▉ | 1259/1827 [00:15<00:07, 80.44it/s]
1756
  69%|██████▉ | 1268/1827 [00:15<00:06, 80.68it/s]
1757
  70%|██████▉ | 1277/1827 [00:15<00:06, 80.44it/s]
1758
  70%|███████ | 1286/1827 [00:15<00:06, 81.54it/s]
1759
  71%|███████ | 1295/1827 [00:16<00:06, 82.61it/s]
1760
  71%|███████▏ | 1304/1827 [00:16<00:06, 83.55it/s]
1761
  72%|███████▏ | 1313/1827 [00:16<00:06, 83.36it/s]
1762
  72%|███████▏ | 1322/1827 [00:16<00:06, 83.66it/s]
1763
  73%|███████▎ | 1331/1827 [00:16<00:05, 84.22it/s]
1764
  73%|███████▎ | 1340/1827 [00:16<00:05, 84.53it/s]
1765
  74%|███████▍ | 1349/1827 [00:16<00:05, 81.75it/s]
1766
  74%|███████▍ | 1358/1827 [00:16<00:05, 80.15it/s]
1767
  75%|███████▍ | 1367/1827 [00:16<00:05, 80.04it/s]
1768
  75%|███████▌ | 1376/1827 [00:17<00:05, 80.07it/s]
1769
  76%|███████▌ | 1385/1827 [00:17<00:05, 81.10it/s]
1770
  76%|███████▋ | 1394/1827 [00:17<00:05, 80.87it/s]
1771
  77%|███████▋ | 1403/1827 [00:17<00:05, 82.04it/s]
1772
  77%|███████▋ | 1412/1827 [00:17<00:05, 82.83it/s]
1773
  78%|███████▊ | 1421/1827 [00:17<00:04, 82.88it/s]
1774
  78%|███████▊ | 1430/1827 [00:17<00:04, 81.23it/s]
1775
  79%|███████▉ | 1439/1827 [00:17<00:04, 79.21it/s]
1776
  79%|███████▉ | 1448/1827 [00:17<00:04, 80.64it/s]
1777
  80%|███████▉ | 1457/1827 [00:18<00:04, 82.29it/s]
1778
  80%|████████ | 1466/1827 [00:18<00:04, 83.72it/s]
1779
  81%|████████ | 1475/1827 [00:18<00:04, 80.74it/s]
1780
  81%|████████ | 1484/1827 [00:18<00:04, 81.51it/s]
1781
  82%|████████▏ | 1493/1827 [00:18<00:04, 79.63it/s]
1782
  82%|████████▏ | 1501/1827 [00:18<00:04, 79.30it/s]
1783
  83%|████████▎ | 1510/1827 [00:18<00:03, 79.91it/s]
1784
  83%|████████▎ | 1519/1827 [00:18<00:03, 80.55it/s]
1785
  84%|████████▎ | 1528/1827 [00:18<00:03, 79.14it/s]
1786
  84%|████████▍ | 1537/1827 [00:19<00:03, 79.70it/s]
1787
  85%|████████▍ | 1546/1827 [00:19<00:03, 80.07it/s]
1788
  85%|████████▌ | 1555/1827 [00:19<00:03, 80.71it/s]
1789
  86%|████████▌ | 1564/1827 [00:19<00:03, 80.13it/s]
1790
  86%|████████▌ | 1573/1827 [00:19<00:03, 80.19it/s]
1791
  87%|████████▋ | 1582/1827 [00:19<00:03, 81.00it/s]
1792
  87%|████████▋ | 1591/1827 [00:19<00:02, 80.84it/s]
1793
  88%|████████▊ | 1600/1827 [00:19<00:02, 80.45it/s]
1794
  88%|████████▊ | 1609/1827 [00:19<00:02, 80.95it/s]
1795
  89%|████████▊ | 1618/1827 [00:20<00:02, 77.39it/s]
1796
  89%|████████▉ | 1626/1827 [00:20<00:02, 75.05it/s]
1797
  89%|████████▉ | 1634/1827 [00:20<00:02, 76.14it/s]
1798
  90%|████████▉ | 1642/1827 [00:20<00:02, 77.16it/s]
1799
  90%|█████████ | 1651/1827 [00:20<00:02, 79.24it/s]
1800
  91%|█████████ | 1660/1827 [00:20<00:02, 79.60it/s]
1801
  91%|��████████▏| 1668/1827 [00:20<00:02, 78.92it/s]
1802
  92%|█████████▏| 1676/1827 [00:20<00:01, 79.10it/s]
1803
  92%|█████████▏| 1684/1827 [00:20<00:01, 78.48it/s]
1804
  93%|█████████▎| 1692/1827 [00:21<00:01, 74.95it/s]
1805
  93%|█████████▎| 1701/1827 [00:21<00:01, 76.99it/s]
1806
  94%|█████████▎| 1710/1827 [00:21<00:01, 78.97it/s]
1807
  94%|█████████▍| 1719/1827 [00:21<00:01, 77.08it/s]
1808
  95%|█████████▍| 1727/1827 [00:21<00:01, 76.46it/s]
1809
  95%|█████████▌| 1736/1827 [00:21<00:01, 77.82it/s]
1810
  96%|█████████▌| 1745/1827 [00:21<00:01, 79.51it/s]
1811
  96%|█████████▌| 1754/1827 [00:21<00:00, 80.76it/s]
1812
  96%|█████████▋| 1763/1827 [00:21<00:00, 80.67it/s]
1813
  97%|█████████▋| 1772/1827 [00:22<00:00, 81.04it/s]
1814
  97%|█████████▋| 1781/1827 [00:22<00:00, 82.22it/s]
1815
  98%|█████████▊| 1790/1827 [00:22<00:00, 82.53it/s]
1816
  98%|█████████▊| 1799/1827 [00:22<00:00, 78.84it/s]
1817
  99%|█████████▉| 1808/1827 [00:22<00:00, 80.29it/s]
1818
  99%|█████████▉| 1817/1827 [00:22<00:00, 81.17it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1509
  {'eval_loss': 0.13674204051494598, 'eval_precision': 0.7882031427920747, 'eval_recall': 0.8097800655124006, 'eval_f1': 0.7988459319099828, 'eval_accuracy': 0.9766776058330014, 'eval_runtime': 14.8828, 'eval_samples_per_second': 457.575, 'eval_steps_per_second': 57.247, 'epoch': 9.99}
1510
  {'train_runtime': 1203.9865, 'train_samples_per_second': 226.157, 'train_steps_per_second': 3.53, 'train_loss': 0.02590363489880281, 'epoch': 9.99}
1511
 
1512
+ ***** train metrics *****
1513
+ epoch = 9.9882
1514
+ total_flos = 11780415GF
1515
+ train_loss = 0.0259
1516
+ train_runtime = 0:20:03.98
1517
+ train_samples = 27229
1518
+ train_samples_per_second = 226.157
1519
+ train_steps_per_second = 3.53
1520
+ 08/30/2024 20:46:02 - INFO - __main__ - *** Evaluate ***
1521
+ [INFO|trainer.py:805] 2024-08-30 20:46:02,283 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1522
+ [INFO|trainer.py:3788] 2024-08-30 20:46:02,286 >>
1523
+ ***** Running Evaluation *****
1524
+ [INFO|trainer.py:3790] 2024-08-30 20:46:02,286 >> Num examples = 6810
1525
+ [INFO|trainer.py:3793] 2024-08-30 20:46:02,286 >> Batch size = 8
1526
+
1527
  0%| | 0/852 [00:00<?, ?it/s]
1528
  1%| | 10/852 [00:00<00:09, 91.43it/s]
1529
  2%|▏ | 20/852 [00:00<00:10, 81.20it/s]
1530
  3%|▎ | 29/852 [00:00<00:10, 75.61it/s]
1531
  4%|▍ | 37/852 [00:00<00:10, 76.54it/s]
1532
  5%|▌ | 46/852 [00:00<00:10, 79.06it/s]
1533
  6%|▋ | 55/852 [00:00<00:09, 80.42it/s]
1534
  8%|▊ | 64/852 [00:00<00:09, 80.69it/s]
1535
  9%|▊ | 73/852 [00:00<00:09, 78.84it/s]
1536
  10%|▉ | 81/852 [00:01<00:09, 78.62it/s]
1537
  11%|█ | 90/852 [00:01<00:09, 79.21it/s]
1538
  12%|█▏ | 98/852 [00:01<00:09, 79.05it/s]
1539
  13%|█▎ | 107/852 [00:01<00:09, 78.52it/s]
1540
  13%|█▎ | 115/852 [00:01<00:09, 78.70it/s]
1541
  15%|█▍ | 124/852 [00:01<00:09, 80.68it/s]
1542
  16%|█▌ | 133/852 [00:01<00:09, 76.87it/s]
1543
  17%|█▋ | 141/852 [00:01<00:09, 77.17it/s]
1544
  17%|█▋ | 149/852 [00:01<00:09, 76.66it/s]
1545
  19%|█▊ | 158/852 [00:02<00:08, 78.34it/s]
1546
  20%|█▉ | 167/852 [00:02<00:08, 79.21it/s]
1547
  21%|██ | 175/852 [00:02<00:08, 79.28it/s]
1548
  22%|██▏ | 184/852 [00:02<00:08, 79.85it/s]
1549
  23%|██▎ | 192/852 [00:02<00:08, 77.56it/s]
1550
  24%|██▎ | 201/852 [00:02<00:08, 79.28it/s]
1551
  25%|██▍ | 209/852 [00:02<00:08, 78.02it/s]
1552
  25%|██▌ | 217/852 [00:02<00:08, 78.36it/s]
1553
  26%|██▋ | 225/852 [00:02<00:07, 78.57it/s]
1554
  27%|██▋ | 234/852 [00:02<00:07, 79.89it/s]
1555
  28%|██▊ | 242/852 [00:03<00:07, 76.69it/s]
1556
  29%|██▉ | 250/852 [00:03<00:07, 77.50it/s]
1557
  30%|███ | 259/852 [00:03<00:07, 78.25it/s]
1558
  31%|███▏ | 267/852 [00:03<00:07, 77.79it/s]
1559
  32%|███▏ | 276/852 [00:03<00:07, 79.12it/s]
1560
  33%|███▎ | 285/852 [00:03<00:07, 79.97it/s]
1561
  34%|███▍ | 293/852 [00:03<00:07, 79.57it/s]
1562
  35%|███▌ | 302/852 [00:03<00:06, 80.12it/s]
1563
  37%|███▋ | 311/852 [00:03<00:06, 81.11it/s]
1564
  38%|███▊ | 320/852 [00:04<00:06, 77.46it/s]
1565
  38%|███▊ | 328/852 [00:04<00:06, 76.62it/s]
1566
  39%|███▉ | 336/852 [00:04<00:06, 76.07it/s]
1567
  40%|████ | 344/852 [00:04<00:06, 77.03it/s]
1568
  41%|████▏ | 352/852 [00:04<00:06, 77.09it/s]
1569
  42%|████▏ | 360/852 [00:04<00:06, 75.43it/s]
1570
  43%|████▎ | 368/852 [00:04<00:06, 76.71it/s]
1571
  44%|████▍ | 376/852 [00:04<00:06, 77.23it/s]
1572
  45%|████▌ | 384/852 [00:04<00:06, 76.84it/s]
1573
  46%|████▌ | 392/852 [00:05<00:05, 77.70it/s]
1574
  47%|████▋ | 400/852 [00:05<00:05, 77.92it/s]
1575
  48%|████▊ | 408/852 [00:05<00:05, 76.03it/s]
1576
  49%|████▉ | 417/852 [00:05<00:05, 77.02it/s]
1577
  50%|████▉ | 425/852 [00:05<00:05, 75.06it/s]
1578
  51%|█████ | 434/852 [00:05<00:05, 77.48it/s]
1579
  52%|█████▏ | 443/852 [00:05<00:05, 77.92it/s]
1580
  53%|█████▎ | 452/852 [00:05<00:05, 78.84it/s]
1581
  54%|█████▍ | 460/852 [00:05<00:04, 78.43it/s]
1582
  55%|█████▍ | 468/852 [00:05<00:04, 76.81it/s]
1583
  56%|█████▌ | 476/852 [00:06<00:05, 74.29it/s]
1584
  57%|█████▋ | 484/852 [00:06<00:04, 74.53it/s]
1585
  58%|█████▊ | 493/852 [00:06<00:04, 76.53it/s]
1586
  59%|█████▉ | 502/852 [00:06<00:04, 77.44it/s]
1587
  60%|█████▉ | 510/852 [00:06<00:04, 77.79it/s]
1588
  61%|██████ | 519/852 [00:06<00:04, 79.51it/s]
1589
  62%|██████▏ | 527/852 [00:06<00:04, 77.59it/s]
1590
  63%|██████▎ | 536/852 [00:06<00:03, 79.53it/s]
1591
  64%|██████▍ | 545/852 [00:06<00:03, 80.00it/s]
1592
  65%|██████▌ | 554/852 [00:07<00:03, 77.58it/s]
1593
  66%|██████▌ | 562/852 [00:07<00:03, 76.79it/s]
1594
  67%|██████▋ | 571/852 [00:07<00:03, 78.19it/s]
1595
  68%|██████▊ | 579/852 [00:07<00:03, 76.97it/s]
1596
  69%|██████▉ | 587/852 [00:07<00:03, 76.55it/s]
1597
  70%|██████▉ | 596/852 [00:07<00:03, 78.37it/s]
1598
  71%|███████ | 604/852 [00:07<00:03, 78.38it/s]
1599
  72%|███████▏ | 612/852 [00:07<00:03, 77.21it/s]
1600
  73%|███████▎ | 620/852 [00:07<00:02, 77.48it/s]
1601
  74%|███████▎ | 628/852 [00:08<00:02, 76.66it/s]
1602
  75%|███████▍ | 636/852 [00:08<00:02, 76.28it/s]
1603
  76%|███████▌ | 644/852 [00:08<00:02, 70.79it/s]
1604
  77%|███████▋ | 652/852 [00:08<00:02, 71.23it/s]
1605
  78%|███████▊ | 661/852 [00:08<00:02, 74.26it/s]
1606
  79%|███████▊ | 669/852 [00:08<00:02, 74.79it/s]
1607
  79%|███████▉ | 677/852 [00:08<00:02, 75.78it/s]
1608
  81%|████████ | 686/852 [00:08<00:02, 77.22it/s]
1609
  82%|████████▏ | 695/852 [00:08<00:01, 78.85it/s]
1610
  83%|████████▎ | 704/852 [00:09<00:01, 80.60it/s]
1611
  84%|████████▎ | 713/852 [00:09<00:01, 81.65it/s]
1612
  85%|████████▍ | 722/852 [00:09<00:01, 80.17it/s]
1613
  86%|████████▌ | 731/852 [00:09<00:01, 79.99it/s]
1614
  87%|████████▋ | 740/852 [00:09<00:01, 80.97it/s]
1615
  88%|████████▊ | 749/852 [00:09<00:01, 81.10it/s]
1616
  89%|████████▉ | 758/852 [00:09<00:01, 81.97it/s]
1617
  90%|█████████ | 767/852 [00:09<00:01, 80.55it/s]
1618
  91%|█████████ | 776/852 [00:09<00:00, 80.68it/s]
1619
  92%|█████████▏| 785/852 [00:10<00:00, 79.64it/s]
1620
  93%|█████████▎| 793/852 [00:10<00:00, 79.71it/s]
1621
  94%|█████████▍| 801/852 [00:10<00:00, 79.62it/s]
1622
  95%|█████████▍| 809/852 [00:10<00:00, 77.20it/s]
1623
  96%|█████████▌| 817/852 [00:10<00:00, 76.64it/s]
1624
  97%|█████████▋| 826/852 [00:10<00:00, 78.45it/s]
1625
  98%|█████████▊| 835/852 [00:10<00:00, 79.43it/s]
1626
  99%|█████████▉| 843/852 [00:10<00:00, 78.34it/s]
1627
+ ***** eval metrics *****
1628
+ epoch = 9.9882
1629
+ eval_accuracy = 0.9768
1630
+ eval_f1 = 0.8015
1631
+ eval_loss = 0.1294
1632
+ eval_precision = 0.7939
1633
+ eval_recall = 0.8093
1634
+ eval_runtime = 0:00:14.48
1635
+ eval_samples = 6810
1636
+ eval_samples_per_second = 470.164
1637
+ eval_steps_per_second = 58.822
1638
+ 08/30/2024 20:46:16 - INFO - __main__ - *** Predict ***
1639
+ [INFO|trainer.py:805] 2024-08-30 20:46:16,773 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1640
+ [INFO|trainer.py:3788] 2024-08-30 20:46:16,775 >>
1641
+ ***** Running Prediction *****
1642
+ [INFO|trainer.py:3790] 2024-08-30 20:46:16,775 >> Num examples = 14614
1643
+ [INFO|trainer.py:3793] 2024-08-30 20:46:16,775 >> Batch size = 8
1644
+
1645
  0%| | 0/1827 [00:00<?, ?it/s]
1646
  1%| | 10/1827 [00:00<00:20, 88.35it/s]
1647
  1%| | 19/1827 [00:00<00:23, 78.49it/s]
1648
  1%|▏ | 27/1827 [00:00<00:23, 77.09it/s]
1649
  2%|▏ | 35/1827 [00:00<00:23, 76.98it/s]
1650
  2%|▏ | 44/1827 [00:00<00:22, 78.90it/s]
1651
  3%|▎ | 52/1827 [00:00<00:22, 77.56it/s]
1652
  3%|▎ | 60/1827 [00:00<00:23, 75.36it/s]
1653
  4%|▎ | 68/1827 [00:00<00:23, 75.95it/s]
1654
  4%|▍ | 76/1827 [00:00<00:22, 76.90it/s]
1655
  5%|▍ | 84/1827 [00:01<00:22, 77.57it/s]
1656
  5%|▌ | 92/1827 [00:01<00:22, 78.03it/s]
1657
  5%|▌ | 100/1827 [00:01<00:22, 77.44it/s]
1658
  6%|▌ | 109/1827 [00:01<00:21, 79.23it/s]
1659
  6%|▋ | 117/1827 [00:01<00:21, 77.97it/s]
1660
  7%|▋ | 125/1827 [00:01<00:21, 77.49it/s]
1661
  7%|▋ | 134/1827 [00:01<00:21, 78.51it/s]
1662
  8%|▊ | 143/1827 [00:01<00:21, 80.08it/s]
1663
  8%|▊ | 152/1827 [00:01<00:20, 79.98it/s]
1664
  9%|▉ | 160/1827 [00:02<00:21, 76.81it/s]
1665
  9%|▉ | 169/1827 [00:02<00:21, 78.19it/s]
1666
  10%|▉ | 178/1827 [00:02<00:20, 80.05it/s]
1667
  10%|█ | 187/1827 [00:02<00:20, 81.05it/s]
1668
  11%|█ | 196/1827 [00:02<00:20, 80.06it/s]
1669
  11%|█ | 205/1827 [00:02<00:20, 79.21it/s]
1670
  12%|█▏ | 214/1827 [00:02<00:20, 78.84it/s]
1671
  12%|█▏ | 222/1827 [00:02<00:20, 78.64it/s]
1672
  13%|█▎ | 230/1827 [00:02<00:20, 77.74it/s]
1673
  13%|█▎ | 238/1827 [00:03<00:20, 76.28it/s]
1674
  14%|█▎ | 247/1827 [00:03<00:20, 78.47it/s]
1675
  14%|█▍ | 256/1827 [00:03<00:19, 79.66it/s]
1676
  14%|█▍ | 264/1827 [00:03<00:19, 79.04it/s]
1677
  15%|█▍ | 273/1827 [00:03<00:19, 80.83it/s]
1678
  15%|█▌ | 282/1827 [00:03<00:18, 81.79it/s]
1679
  16%|█▌ | 291/1827 [00:03<00:19, 80.01it/s]
1680
  16%|█▋ | 300/1827 [00:03<00:18, 80.85it/s]
1681
  17%|█▋ | 309/1827 [00:03<00:19, 79.16it/s]
1682
  17%|█▋ | 317/1827 [00:04<00:19, 79.20it/s]
1683
  18%|█▊ | 326/1827 [00:04<00:18, 80.90it/s]
1684
  18%|█▊ | 335/1827 [00:04<00:18, 80.27it/s]
1685
  19%|█▉ | 344/1827 [00:04<00:18, 81.58it/s]
1686
  19%|█▉ | 353/1827 [00:04<00:19, 76.68it/s]
1687
  20%|█▉ | 361/1827 [00:04<00:18, 77.41it/s]
1688
  20%|██ | 370/1827 [00:04<00:18, 78.63it/s]
1689
  21%|██ | 379/1827 [00:04<00:18, 79.94it/s]
1690
  21%|██ | 388/1827 [00:04<00:18, 79.31it/s]
1691
  22%|██▏ | 396/1827 [00:05<00:18, 78.99it/s]
1692
  22%|██▏ | 405/1827 [00:05<00:17, 80.86it/s]
1693
  23%|██▎ | 414/1827 [00:05<00:17, 79.00it/s]
1694
  23%|██▎ | 423/1827 [00:05<00:17, 80.11it/s]
1695
  24%|██▎ | 432/1827 [00:05<00:17, 79.18it/s]
1696
  24%|██▍ | 440/1827 [00:05<00:17, 79.33it/s]
1697
  25%|██▍ | 448/1827 [00:05<00:17, 79.41it/s]
1698
  25%|██▌ | 457/1827 [00:05<00:17, 80.52it/s]
1699
  26%|██▌ | 466/1827 [00:05<00:16, 80.24it/s]
1700
  26%|██▌ | 475/1827 [00:06<00:17, 78.66it/s]
1701
  26%|██▋ | 483/1827 [00:06<00:17, 78.64it/s]
1702
  27%|██▋ | 491/1827 [00:06<00:17, 77.35it/s]
1703
  27%|██▋ | 500/1827 [00:06<00:16, 78.45it/s]
1704
  28%|██▊ | 508/1827 [00:06<00:16, 78.32it/s]
1705
  28%|██▊ | 516/1827 [00:06<00:16, 78.75it/s]
1706
  29%|██▊ | 525/1827 [00:06<00:16, 78.79it/s]
1707
  29%|██▉ | 533/1827 [00:06<00:16, 78.81it/s]
1708
  30%|██▉ | 541/1827 [00:06<00:16, 79.14it/s]
1709
  30%|███ | 550/1827 [00:06<00:16, 79.35it/s]
1710
  31%|███ | 559/1827 [00:07<00:16, 79.12it/s]
1711
  31%|███ | 568/1827 [00:07<00:15, 81.14it/s]
1712
  32%|███▏ | 577/1827 [00:07<00:15, 82.20it/s]
1713
  32%|███▏ | 586/1827 [00:07<00:15, 81.32it/s]
1714
  33%|███▎ | 595/1827 [00:07<00:15, 81.45it/s]
1715
  33%|███▎ | 604/1827 [00:07<00:14, 81.95it/s]
1716
  34%|███▎ | 613/1827 [00:07<00:15, 80.89it/s]
1717
  34%|███▍ | 622/1827 [00:07<00:14, 80.81it/s]
1718
  35%|███▍ | 631/1827 [00:07<00:14, 82.11it/s]
1719
  35%|███▌ | 640/1827 [00:08<00:14, 82.74it/s]
1720
  36%|███▌ | 649/1827 [00:08<00:14, 80.60it/s]
1721
  36%|███▌ | 658/1827 [00:08<00:14, 78.56it/s]
1722
  37%|███▋ | 667/1827 [00:08<00:14, 80.49it/s]
1723
  37%|███▋ | 676/1827 [00:08<00:14, 80.58it/s]
1724
  37%|███▋ | 685/1827 [00:08<00:13, 81.73it/s]
1725
  38%|███▊ | 694/1827 [00:08<00:13, 82.81it/s]
1726
  38%|███▊ | 703/1827 [00:08<00:13, 81.28it/s]
1727
  39%|███▉ | 712/1827 [00:08<00:13, 82.12it/s]
1728
  39%|███▉ | 721/1827 [00:09<00:13, 83.16it/s]
1729
  40%|███▉ | 730/1827 [00:09<00:13, 83.47it/s]
1730
  40%|████ | 739/1827 [00:09<00:12, 83.79it/s]
1731
  41%|████ | 748/1827 [00:09<00:12, 84.99it/s]
1732
  41%|████▏ | 757/1827 [00:09<00:12, 85.15it/s]
1733
  42%|████▏ | 766/1827 [00:09<00:12, 85.06it/s]
1734
  42%|████▏ | 775/1827 [00:09<00:12, 85.29it/s]
1735
  43%|████▎ | 784/1827 [00:09<00:12, 81.63it/s]
1736
  43%|████▎ | 793/1827 [00:09<00:12, 82.24it/s]
1737
  44%|████▍ | 802/1827 [00:10<00:12, 81.57it/s]
1738
  44%|████▍ | 811/1827 [00:10<00:12, 82.68it/s]
1739
  45%|████▍ | 820/1827 [00:10<00:12, 82.99it/s]
1740
  45%|████▌ | 829/1827 [00:10<00:11, 83.82it/s]
1741
  46%|████▌ | 838/1827 [00:10<00:12, 82.25it/s]
1742
  46%|████▋ | 847/1827 [00:10<00:11, 83.20it/s]
1743
  47%|████▋ | 856/1827 [00:10<00:11, 84.21it/s]
1744
  47%|████▋ | 865/1827 [00:10<00:11, 84.27it/s]
1745
  48%|████▊ | 874/1827 [00:10<00:11, 80.96it/s]
1746
  48%|████▊ | 883/1827 [00:11<00:11, 81.30it/s]
1747
  49%|████▉ | 892/1827 [00:11<00:11, 80.78it/s]
1748
  49%|████▉ | 901/1827 [00:11<00:11, 82.19it/s]
1749
  50%|████▉ | 910/1827 [00:11<00:11, 79.40it/s]
1750
  50%|█████ | 919/1827 [00:11<00:11, 79.95it/s]
1751
  51%|█████ | 928/1827 [00:11<00:11, 80.89it/s]
1752
  51%|█████▏ | 937/1827 [00:11<00:11, 79.25it/s]
1753
  52%|█████▏ | 945/1827 [00:11<00:11, 79.00it/s]
1754
  52%|█████▏ | 953/1827 [00:11<00:11, 77.83it/s]
1755
  53%|█████▎ | 962/1827 [00:12<00:10, 78.70it/s]
1756
  53%|█████▎ | 971/1827 [00:12<00:10, 80.17it/s]
1757
  54%|█████▎ | 980/1827 [00:12<00:10, 79.85it/s]
1758
  54%|█████▍ | 989/1827 [00:12<00:10, 81.39it/s]
1759
  55%|█████▍ | 998/1827 [00:12<00:10, 82.20it/s]
1760
  55%|█████▌ | 1007/1827 [00:12<00:10, 81.27it/s]
1761
  56%|█████▌ | 1016/1827 [00:12<00:10, 80.65it/s]
1762
  56%|█████▌ | 1025/1827 [00:12<00:09, 81.53it/s]
1763
  57%|█████▋ | 1034/1827 [00:12<00:09, 79.74it/s]
1764
  57%|█████▋ | 1043/1827 [00:13<00:09, 80.67it/s]
1765
  58%|█████▊ | 1052/1827 [00:13<00:09, 81.68it/s]
1766
  58%|█████▊ | 1061/1827 [00:13<00:09, 82.32it/s]
1767
  59%|█████▊ | 1070/1827 [00:13<00:09, 83.48it/s]
1768
  59%|█████▉ | 1079/1827 [00:13<00:08, 83.94it/s]
1769
  60%|█████▉ | 1088/1827 [00:13<00:08, 83.86it/s]
1770
  60%|██████ | 1097/1827 [00:13<00:08, 83.97it/s]
1771
  61%|██████ | 1106/1827 [00:13<00:08, 83.02it/s]
1772
  61%|██████ | 1115/1827 [00:13<00:08, 81.44it/s]
1773
  62%|██████▏ | 1124/1827 [00:13<00:08, 81.63it/s]
1774
  62%|██████▏ | 1133/1827 [00:14<00:08, 81.21it/s]
1775
  63%|██████▎ | 1142/1827 [00:14<00:08, 82.28it/s]
1776
  63%|██████▎ | 1151/1827 [00:14<00:08, 83.08it/s]
1777
  63%|██████▎ | 1160/1827 [00:14<00:08, 81.13it/s]
1778
  64%|██████▍ | 1169/1827 [00:14<00:08, 81.26it/s]
1779
  64%|██████▍ | 1178/1827 [00:14<00:07, 82.06it/s]
1780
  65%|██████▍ | 1187/1827 [00:14<00:08, 77.70it/s]
1781
  65%|██████▌ | 1196/1827 [00:14<00:08, 78.65it/s]
1782
  66%|██████▌ | 1205/1827 [00:14<00:07, 80.19it/s]
1783
  66%|██████▋ | 1214/1827 [00:15<00:07, 78.78it/s]
1784
  67%|██████▋ | 1223/1827 [00:15<00:07, 79.88it/s]
1785
  67%|██████▋ | 1232/1827 [00:15<00:07, 81.80it/s]
1786
  68%|██████▊ | 1241/1827 [00:15<00:07, 82.09it/s]
1787
  68%|██████▊ | 1250/1827 [00:15<00:06, 82.44it/s]
1788
  69%|██████▉ | 1259/1827 [00:15<00:07, 80.44it/s]
1789
  69%|██████▉ | 1268/1827 [00:15<00:06, 80.68it/s]
1790
  70%|██████▉ | 1277/1827 [00:15<00:06, 80.44it/s]
1791
  70%|███████ | 1286/1827 [00:15<00:06, 81.54it/s]
1792
  71%|███████ | 1295/1827 [00:16<00:06, 82.61it/s]
1793
  71%|███████▏ | 1304/1827 [00:16<00:06, 83.55it/s]
1794
  72%|███████▏ | 1313/1827 [00:16<00:06, 83.36it/s]
1795
  72%|███████▏ | 1322/1827 [00:16<00:06, 83.66it/s]
1796
  73%|███████▎ | 1331/1827 [00:16<00:05, 84.22it/s]
1797
  73%|███████▎ | 1340/1827 [00:16<00:05, 84.53it/s]
1798
  74%|███████▍ | 1349/1827 [00:16<00:05, 81.75it/s]
1799
  74%|███████▍ | 1358/1827 [00:16<00:05, 80.15it/s]
1800
  75%|███████▍ | 1367/1827 [00:16<00:05, 80.04it/s]
1801
  75%|███████▌ | 1376/1827 [00:17<00:05, 80.07it/s]
1802
  76%|███████▌ | 1385/1827 [00:17<00:05, 81.10it/s]
1803
  76%|███████▋ | 1394/1827 [00:17<00:05, 80.87it/s]
1804
  77%|███████▋ | 1403/1827 [00:17<00:05, 82.04it/s]
1805
  77%|███████▋ | 1412/1827 [00:17<00:05, 82.83it/s]
1806
  78%|███████▊ | 1421/1827 [00:17<00:04, 82.88it/s]
1807
  78%|███████▊ | 1430/1827 [00:17<00:04, 81.23it/s]
1808
  79%|███████▉ | 1439/1827 [00:17<00:04, 79.21it/s]
1809
  79%|███████▉ | 1448/1827 [00:17<00:04, 80.64it/s]
1810
  80%|███████▉ | 1457/1827 [00:18<00:04, 82.29it/s]
1811
  80%|████████ | 1466/1827 [00:18<00:04, 83.72it/s]
1812
  81%|████████ | 1475/1827 [00:18<00:04, 80.74it/s]
1813
  81%|████████ | 1484/1827 [00:18<00:04, 81.51it/s]
1814
  82%|████████▏ | 1493/1827 [00:18<00:04, 79.63it/s]
1815
  82%|████████▏ | 1501/1827 [00:18<00:04, 79.30it/s]
1816
  83%|████████▎ | 1510/1827 [00:18<00:03, 79.91it/s]
1817
  83%|████████▎ | 1519/1827 [00:18<00:03, 80.55it/s]
1818
  84%|████████▎ | 1528/1827 [00:18<00:03, 79.14it/s]
1819
  84%|████████▍ | 1537/1827 [00:19<00:03, 79.70it/s]
1820
  85%|████████▍ | 1546/1827 [00:19<00:03, 80.07it/s]
1821
  85%|████████▌ | 1555/1827 [00:19<00:03, 80.71it/s]
1822
  86%|████████▌ | 1564/1827 [00:19<00:03, 80.13it/s]
1823
  86%|████████▌ | 1573/1827 [00:19<00:03, 80.19it/s]
1824
  87%|████████▋ | 1582/1827 [00:19<00:03, 81.00it/s]
1825
  87%|████████▋ | 1591/1827 [00:19<00:02, 80.84it/s]
1826
  88%|████████▊ | 1600/1827 [00:19<00:02, 80.45it/s]
1827
  88%|████████▊ | 1609/1827 [00:19<00:02, 80.95it/s]
1828
  89%|████████▊ | 1618/1827 [00:20<00:02, 77.39it/s]
1829
  89%|████████▉ | 1626/1827 [00:20<00:02, 75.05it/s]
1830
  89%|████████▉ | 1634/1827 [00:20<00:02, 76.14it/s]
1831
  90%|████████▉ | 1642/1827 [00:20<00:02, 77.16it/s]
1832
  90%|█████████ | 1651/1827 [00:20<00:02, 79.24it/s]
1833
  91%|█████████ | 1660/1827 [00:20<00:02, 79.60it/s]
1834
  91%|��████████▏| 1668/1827 [00:20<00:02, 78.92it/s]
1835
  92%|█████████▏| 1676/1827 [00:20<00:01, 79.10it/s]
1836
  92%|█████████▏| 1684/1827 [00:20<00:01, 78.48it/s]
1837
  93%|█████████▎| 1692/1827 [00:21<00:01, 74.95it/s]
1838
  93%|█████████▎| 1701/1827 [00:21<00:01, 76.99it/s]
1839
  94%|█████████▎| 1710/1827 [00:21<00:01, 78.97it/s]
1840
  94%|█████████▍| 1719/1827 [00:21<00:01, 77.08it/s]
1841
  95%|█████████▍| 1727/1827 [00:21<00:01, 76.46it/s]
1842
  95%|█████████▌| 1736/1827 [00:21<00:01, 77.82it/s]
1843
  96%|█████████▌| 1745/1827 [00:21<00:01, 79.51it/s]
1844
  96%|█████████▌| 1754/1827 [00:21<00:00, 80.76it/s]
1845
  96%|█████████▋| 1763/1827 [00:21<00:00, 80.67it/s]
1846
  97%|█████████▋| 1772/1827 [00:22<00:00, 81.04it/s]
1847
  97%|█████████▋| 1781/1827 [00:22<00:00, 82.22it/s]
1848
  98%|█████████▊| 1790/1827 [00:22<00:00, 82.53it/s]
1849
  98%|█████████▊| 1799/1827 [00:22<00:00, 78.84it/s]
1850
  99%|█████████▉| 1808/1827 [00:22<00:00, 80.29it/s]
1851
  99%|█████████▉| 1817/1827 [00:22<00:00, 81.17it/s]
1852
+ [INFO|trainer.py:3478] 2024-08-30 20:46:46,759 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1853
+ [INFO|configuration_utils.py:472] 2024-08-30 20:46:46,762 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1854
+ [INFO|modeling_utils.py:2690] 2024-08-30 20:46:48,100 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1855
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 20:46:48,101 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1856
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 20:46:48,101 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1857
+ ***** predict metrics *****
1858
+ predict_accuracy = 0.9766
1859
+ predict_f1 = 0.8012
1860
+ predict_loss = 0.1233
1861
+ predict_precision = 0.7872
1862
+ predict_recall = 0.8157
1863
+ predict_runtime = 0:00:29.36
1864
+ predict_samples_per_second = 497.683
1865
+ predict_steps_per_second = 62.219
1866
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 9.988249118683902,
3
- "total_flos": 1.2649810588547778e+16,
4
- "train_loss": 0.10639642311544979,
5
- "train_runtime": 1208.2019,
6
  "train_samples": 27229,
7
- "train_samples_per_second": 225.368,
8
- "train_steps_per_second": 3.518
9
  }
 
1
  {
2
  "epoch": 9.988249118683902,
3
+ "total_flos": 1.2649124434987926e+16,
4
+ "train_loss": 0.02590363489880281,
5
+ "train_runtime": 1203.9865,
6
  "train_samples": 27229,
7
+ "train_samples_per_second": 226.157,
8
+ "train_steps_per_second": 3.53
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.17333222008850296,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127",
4
  "epoch": 9.988249118683902,
5
  "eval_steps": 500,
6
  "global_step": 4250,
@@ -10,188 +10,188 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9988249118683902,
13
- "eval_accuracy": 0.7849561289082346,
14
- "eval_f1": 0.1613083366573594,
15
- "eval_loss": 0.6610585451126099,
16
- "eval_precision": 0.08832008386476806,
17
- "eval_recall": 0.9292279411764706,
18
- "eval_runtime": 14.7703,
19
- "eval_samples_per_second": 461.06,
20
- "eval_steps_per_second": 57.683,
21
  "step": 425
22
  },
23
  {
24
  "epoch": 1.1750881316098707,
25
- "grad_norm": 1.4406206607818604,
26
  "learning_rate": 4.411764705882353e-05,
27
- "loss": 0.3349,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.7551182940392986,
33
- "eval_f1": 0.14506880733944952,
34
- "eval_loss": 0.9204075932502747,
35
- "eval_precision": 0.07866915422885572,
36
- "eval_recall": 0.9301470588235294,
37
- "eval_runtime": 14.7453,
38
- "eval_samples_per_second": 461.844,
39
- "eval_steps_per_second": 57.781,
40
  "step": 851
41
  },
42
  {
43
  "epoch": 2.3501762632197414,
44
- "grad_norm": 2.988006591796875,
45
  "learning_rate": 3.8235294117647055e-05,
46
- "loss": 0.1788,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 2.99882491186839,
51
- "eval_accuracy": 0.7645035495077375,
52
- "eval_f1": 0.15487907225146869,
53
- "eval_loss": 0.9544711709022522,
54
- "eval_precision": 0.0844496214327315,
55
- "eval_recall": 0.9329044117647058,
56
- "eval_runtime": 14.8107,
57
- "eval_samples_per_second": 459.803,
58
- "eval_steps_per_second": 57.526,
59
  "step": 1276
60
  },
61
  {
62
  "epoch": 3.525264394829612,
63
- "grad_norm": 1.1645787954330444,
64
  "learning_rate": 3.235294117647059e-05,
65
- "loss": 0.1227,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.7692064756203056,
71
- "eval_f1": 0.1618332674832082,
72
- "eval_loss": 1.0923608541488647,
73
- "eval_precision": 0.08852770813521224,
74
- "eval_recall": 0.9411764705882353,
75
- "eval_runtime": 14.8211,
76
- "eval_samples_per_second": 459.48,
77
- "eval_steps_per_second": 57.486,
78
  "step": 1702
79
  },
80
  {
81
  "epoch": 4.700352526439483,
82
- "grad_norm": 1.1214195489883423,
83
  "learning_rate": 2.647058823529412e-05,
84
- "loss": 0.0856,
85
  "step": 2000
86
  },
87
  {
88
  "epoch": 4.9988249118683905,
89
- "eval_accuracy": 0.7932840841995413,
90
- "eval_f1": 0.17333222008850296,
91
- "eval_loss": 1.0502684116363525,
92
- "eval_precision": 0.09532555790247038,
93
- "eval_recall": 0.9540441176470589,
94
- "eval_runtime": 14.797,
95
- "eval_samples_per_second": 460.229,
96
- "eval_steps_per_second": 57.579,
97
  "step": 2127
98
  },
99
  {
100
  "epoch": 5.875440658049354,
101
- "grad_norm": 1.1390776634216309,
102
  "learning_rate": 2.058823529411765e-05,
103
- "loss": 0.0597,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
- "eval_accuracy": 0.7787771018990209,
109
- "eval_f1": 0.16632958498503356,
110
- "eval_loss": 1.2641881704330444,
111
- "eval_precision": 0.0911913421449481,
112
- "eval_recall": 0.9448529411764706,
113
- "eval_runtime": 14.5753,
114
- "eval_samples_per_second": 467.228,
115
- "eval_steps_per_second": 58.455,
116
  "step": 2553
117
  },
118
  {
119
  "epoch": 6.9988249118683905,
120
- "eval_accuracy": 0.7828758564817994,
121
- "eval_f1": 0.16898640903880793,
122
- "eval_loss": 1.3261910676956177,
123
- "eval_precision": 0.09275570735214812,
124
- "eval_recall": 0.9485294117647058,
125
- "eval_runtime": 14.5617,
126
- "eval_samples_per_second": 467.665,
127
- "eval_steps_per_second": 58.51,
128
  "step": 2978
129
  },
130
  {
131
  "epoch": 7.050528789659224,
132
- "grad_norm": 0.5195357799530029,
133
  "learning_rate": 1.4705882352941177e-05,
134
- "loss": 0.0458,
135
  "step": 3000
136
  },
137
  {
138
  "epoch": 8.0,
139
- "eval_accuracy": 0.7848943386381424,
140
- "eval_f1": 0.1687846203975236,
141
- "eval_loss": 1.3697636127471924,
142
- "eval_precision": 0.09259921344297461,
143
- "eval_recall": 0.9522058823529411,
144
- "eval_runtime": 14.6608,
145
- "eval_samples_per_second": 464.504,
146
- "eval_steps_per_second": 58.114,
147
  "step": 3404
148
  },
149
  {
150
  "epoch": 8.225616921269095,
151
- "grad_norm": 0.8723571300506592,
152
  "learning_rate": 8.823529411764707e-06,
153
- "loss": 0.0343,
154
  "step": 3500
155
  },
156
  {
157
  "epoch": 8.99882491186839,
158
- "eval_accuracy": 0.782223625853049,
159
- "eval_f1": 0.165499476776946,
160
- "eval_loss": 1.4433350563049316,
161
- "eval_precision": 0.09069254521393913,
162
- "eval_recall": 0.9448529411764706,
163
- "eval_runtime": 15.0504,
164
- "eval_samples_per_second": 452.478,
165
- "eval_steps_per_second": 56.61,
166
  "step": 3829
167
  },
168
  {
169
  "epoch": 9.400705052878966,
170
- "grad_norm": 0.6075822710990906,
171
  "learning_rate": 2.9411764705882355e-06,
172
- "loss": 0.0292,
173
  "step": 4000
174
  },
175
  {
176
  "epoch": 9.988249118683902,
177
- "eval_accuracy": 0.7820794485561674,
178
- "eval_f1": 0.16674769081186194,
179
- "eval_loss": 1.4861844778060913,
180
- "eval_precision": 0.0914341567442687,
181
- "eval_recall": 0.9457720588235294,
182
- "eval_runtime": 15.3988,
183
- "eval_samples_per_second": 442.243,
184
- "eval_steps_per_second": 55.329,
185
  "step": 4250
186
  },
187
  {
188
  "epoch": 9.988249118683902,
189
  "step": 4250,
190
- "total_flos": 1.2649810588547778e+16,
191
- "train_loss": 0.10639642311544979,
192
- "train_runtime": 1208.2019,
193
- "train_samples_per_second": 225.368,
194
- "train_steps_per_second": 3.518
195
  }
196
  ],
197
  "logging_steps": 500,
@@ -211,7 +211,7 @@
211
  "attributes": {}
212
  }
213
  },
214
- "total_flos": 1.2649810588547778e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8015293708724366,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3404",
4
  "epoch": 9.988249118683902,
5
  "eval_steps": 500,
6
  "global_step": 4250,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9988249118683902,
13
+ "eval_accuracy": 0.9733340656624604,
14
+ "eval_f1": 0.7536426810132258,
15
+ "eval_loss": 0.07383445650339127,
16
+ "eval_precision": 0.7233218588640276,
17
+ "eval_recall": 0.786616752456715,
18
+ "eval_runtime": 14.6851,
19
+ "eval_samples_per_second": 463.736,
20
+ "eval_steps_per_second": 58.018,
21
  "step": 425
22
  },
23
  {
24
  "epoch": 1.1750881316098707,
25
+ "grad_norm": 1.1428656578063965,
26
  "learning_rate": 4.411764705882353e-05,
27
+ "loss": 0.0996,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "eval_accuracy": 0.9743364411550661,
33
+ "eval_f1": 0.7698492462311558,
34
+ "eval_loss": 0.07865303754806519,
35
+ "eval_precision": 0.7363811151463363,
36
+ "eval_recall": 0.8065044454843239,
37
+ "eval_runtime": 14.4143,
38
+ "eval_samples_per_second": 472.448,
39
+ "eval_steps_per_second": 59.108,
40
  "step": 851
41
  },
42
  {
43
  "epoch": 2.3501762632197414,
44
+ "grad_norm": 1.476723074913025,
45
  "learning_rate": 3.8235294117647055e-05,
46
+ "loss": 0.0458,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 2.99882491186839,
51
+ "eval_accuracy": 0.9759155258351985,
52
+ "eval_f1": 0.7928563303378454,
53
+ "eval_loss": 0.07876282930374146,
54
+ "eval_precision": 0.7715297764002657,
55
+ "eval_recall": 0.8153954141319607,
56
+ "eval_runtime": 14.6794,
57
+ "eval_samples_per_second": 463.917,
58
+ "eval_steps_per_second": 58.041,
59
  "step": 1276
60
  },
61
  {
62
  "epoch": 3.525264394829612,
63
+ "grad_norm": 0.4094911515712738,
64
  "learning_rate": 3.235294117647059e-05,
65
+ "loss": 0.0279,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.9757026926826589,
71
+ "eval_f1": 0.7929102344196683,
72
+ "eval_loss": 0.09221930056810379,
73
+ "eval_precision": 0.775441735629613,
74
+ "eval_recall": 0.8111839026672906,
75
+ "eval_runtime": 14.825,
76
+ "eval_samples_per_second": 459.361,
77
+ "eval_steps_per_second": 57.471,
78
  "step": 1702
79
  },
80
  {
81
  "epoch": 4.700352526439483,
82
+ "grad_norm": 0.49960169196128845,
83
  "learning_rate": 2.647058823529412e-05,
84
+ "loss": 0.0169,
85
  "step": 2000
86
  },
87
  {
88
  "epoch": 4.9988249118683905,
89
+ "eval_accuracy": 0.974439424938553,
90
+ "eval_f1": 0.7863421230561191,
91
+ "eval_loss": 0.09940312057733536,
92
+ "eval_precision": 0.7584782608695653,
93
+ "eval_recall": 0.816331305568554,
94
+ "eval_runtime": 14.3671,
95
+ "eval_samples_per_second": 473.999,
96
+ "eval_steps_per_second": 59.302,
97
  "step": 2127
98
  },
99
  {
100
  "epoch": 5.875440658049354,
101
+ "grad_norm": 0.6507154703140259,
102
  "learning_rate": 2.058823529411765e-05,
103
+ "loss": 0.0114,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "eval_accuracy": 0.9764579070948961,
109
+ "eval_f1": 0.7909059593523942,
110
+ "eval_loss": 0.10803968459367752,
111
+ "eval_precision": 0.7765501691093574,
112
+ "eval_recall": 0.8058025269068788,
113
+ "eval_runtime": 14.5335,
114
+ "eval_samples_per_second": 468.572,
115
+ "eval_steps_per_second": 58.623,
116
  "step": 2553
117
  },
118
  {
119
  "epoch": 6.9988249118683905,
120
+ "eval_accuracy": 0.975997912861988,
121
+ "eval_f1": 0.7943099690260411,
122
+ "eval_loss": 0.11656877398490906,
123
+ "eval_precision": 0.7792032410533424,
124
+ "eval_recall": 0.810014038371549,
125
+ "eval_runtime": 14.6842,
126
+ "eval_samples_per_second": 463.764,
127
+ "eval_steps_per_second": 58.022,
128
  "step": 2978
129
  },
130
  {
131
  "epoch": 7.050528789659224,
132
+ "grad_norm": 0.15811629593372345,
133
  "learning_rate": 1.4705882352941177e-05,
134
+ "loss": 0.0079,
135
  "step": 3000
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "eval_accuracy": 0.9767668584453568,
140
+ "eval_f1": 0.8015293708724366,
141
+ "eval_loss": 0.1294233798980713,
142
+ "eval_precision": 0.7938948817994033,
143
+ "eval_recall": 0.8093121197941039,
144
+ "eval_runtime": 14.774,
145
+ "eval_samples_per_second": 460.946,
146
+ "eval_steps_per_second": 57.669,
147
  "step": 3404
148
  },
149
  {
150
  "epoch": 8.225616921269095,
151
+ "grad_norm": 0.11404519528150558,
152
  "learning_rate": 8.823529411764707e-06,
153
+ "loss": 0.0053,
154
  "step": 3500
155
  },
156
  {
157
  "epoch": 8.99882491186839,
158
+ "eval_accuracy": 0.9766158155629093,
159
+ "eval_f1": 0.7988929889298894,
160
+ "eval_loss": 0.13398884236812592,
161
+ "eval_precision": 0.787630741246021,
162
+ "eval_recall": 0.8104819840898456,
163
+ "eval_runtime": 14.4437,
164
+ "eval_samples_per_second": 471.487,
165
+ "eval_steps_per_second": 58.988,
166
  "step": 3829
167
  },
168
  {
169
  "epoch": 9.400705052878966,
170
+ "grad_norm": 0.5636719465255737,
171
  "learning_rate": 2.9411764705882355e-06,
172
+ "loss": 0.0038,
173
  "step": 4000
174
  },
175
  {
176
  "epoch": 9.988249118683902,
177
+ "eval_accuracy": 0.9766776058330014,
178
+ "eval_f1": 0.7988459319099828,
179
+ "eval_loss": 0.13674204051494598,
180
+ "eval_precision": 0.7882031427920747,
181
+ "eval_recall": 0.8097800655124006,
182
+ "eval_runtime": 14.8828,
183
+ "eval_samples_per_second": 457.575,
184
+ "eval_steps_per_second": 57.247,
185
  "step": 4250
186
  },
187
  {
188
  "epoch": 9.988249118683902,
189
  "step": 4250,
190
+ "total_flos": 1.2649124434987926e+16,
191
+ "train_loss": 0.02590363489880281,
192
+ "train_runtime": 1203.9865,
193
+ "train_samples_per_second": 226.157,
194
+ "train_steps_per_second": 3.53
195
  }
196
  ],
197
  "logging_steps": 500,
 
211
  "attributes": {}
212
  }
213
  },
214
+ "total_flos": 1.2649124434987926e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null