Rodrigo1771 commited on
Commit
1538eb6
1 Parent(s): 8e76363

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - multi-train-drugtemist-dev-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,24 +18,24 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: multi-train-drugtemist-dev-ner
21
- type: multi-train-drugtemist-dev-ner
22
  config: MultiTrainDrugTEMISTDevNER
23
  split: validation
24
  args: MultiTrainDrugTEMISTDevNER
25
  metrics:
26
  - name: Precision
27
  type: precision
28
- value: 0.09270693512304251
29
  - name: Recall
30
  type: recall
31
- value: 0.9522058823529411
32
  - name: F1
33
  type: f1
34
- value: 0.16896354888689555
35
  - name: Accuracy
36
  type: accuracy
37
- value: 0.7845534874460183
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -42,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the multi-train-drugtemist-dev-ner dataset.
46
  It achieves the following results on the evaluation set:
47
- - Loss: 1.7861
48
- - Precision: 0.0927
49
- - Recall: 0.9522
50
- - F1: 0.1690
51
- - Accuracy: 0.7846
52
 
53
  ## Model description
54
 
 
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/multi-train-drugtemist-dev-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/multi-train-drugtemist-dev-ner
22
+ type: Rodrigo1771/multi-train-drugtemist-dev-ner
23
  config: MultiTrainDrugTEMISTDevNER
24
  split: validation
25
  args: MultiTrainDrugTEMISTDevNER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.09691960931630353
30
  - name: Recall
31
  type: recall
32
+ value: 0.9485294117647058
33
  - name: F1
34
  type: f1
35
+ value: 0.17586912065439672
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.8099635429897495
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/multi-train-drugtemist-dev-ner dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.6631
49
+ - Precision: 0.0969
50
+ - Recall: 0.9485
51
+ - F1: 0.1759
52
+ - Accuracy: 0.8100
53
 
54
  ## Model description
55
 
all_results.json CHANGED
@@ -1,19 +1,26 @@
1
  {
2
- "eval_accuracy": 0.002835545241707918,
3
- "eval_f1": 0.0006942779922141681,
4
- "eval_loss": 2.4030845165252686,
5
- "eval_precision": 0.00035028898841544273,
6
- "eval_recall": 0.03860294117647059,
7
- "eval_runtime": 16.7962,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 405.27,
10
- "eval_steps_per_second": 50.666,
11
- "predict_accuracy": 0.002835545241707918,
12
- "predict_f1": 0.0006942779922141681,
13
- "predict_loss": 2.4030845165252686,
14
- "predict_precision": 0.00035028898841544273,
15
- "predict_recall": 0.03860294117647059,
16
- "predict_runtime": 16.0715,
17
- "predict_samples_per_second": 423.545,
18
- "predict_steps_per_second": 52.951
 
 
 
 
 
 
19
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.8099635429897495,
4
+ "eval_f1": 0.17586912065439672,
5
+ "eval_loss": 0.6631014347076416,
6
+ "eval_precision": 0.09691960931630353,
7
+ "eval_recall": 0.9485294117647058,
8
+ "eval_runtime": 15.8157,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 430.395,
11
+ "eval_steps_per_second": 53.807,
12
+ "predict_accuracy": 0.8099635429897495,
13
+ "predict_f1": 0.17586912065439672,
14
+ "predict_loss": 0.6631014347076416,
15
+ "predict_precision": 0.09691960931630353,
16
+ "predict_recall": 0.9485294117647058,
17
+ "predict_runtime": 15.8984,
18
+ "predict_samples_per_second": 428.156,
19
+ "predict_steps_per_second": 53.527,
20
+ "total_flos": 6700722040732752.0,
21
+ "train_loss": 0.08913132946046923,
22
+ "train_runtime": 3337.712,
23
+ "train_samples": 27224,
24
+ "train_samples_per_second": 81.565,
25
+ "train_steps_per_second": 5.096
26
  }
eval_results.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
- "eval_accuracy": 0.002835545241707918,
3
- "eval_f1": 0.0006942779922141681,
4
- "eval_loss": 2.4030845165252686,
5
- "eval_precision": 0.00035028898841544273,
6
- "eval_recall": 0.03860294117647059,
7
- "eval_runtime": 16.7962,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 405.27,
10
- "eval_steps_per_second": 50.666
11
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.8099635429897495,
4
+ "eval_f1": 0.17586912065439672,
5
+ "eval_loss": 0.6631014347076416,
6
+ "eval_precision": 0.09691960931630353,
7
+ "eval_recall": 0.9485294117647058,
8
+ "eval_runtime": 15.8157,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 430.395,
11
+ "eval_steps_per_second": 53.807
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.002835545241707918,
3
- "predict_f1": 0.0006942779922141681,
4
- "predict_loss": 2.4030845165252686,
5
- "predict_precision": 0.00035028898841544273,
6
- "predict_recall": 0.03860294117647059,
7
- "predict_runtime": 16.0715,
8
- "predict_samples_per_second": 423.545,
9
- "predict_steps_per_second": 52.951
10
  }
 
1
  {
2
+ "predict_accuracy": 0.8099635429897495,
3
+ "predict_f1": 0.17586912065439672,
4
+ "predict_loss": 0.6631014347076416,
5
+ "predict_precision": 0.09691960931630353,
6
+ "predict_recall": 0.9485294117647058,
7
+ "predict_runtime": 15.8984,
8
+ "predict_samples_per_second": 428.156,
9
+ "predict_steps_per_second": 53.527
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1715612339.c331905616cf.3060.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd631ac54bf6215fcba3db4eb9803c4beb0d2d937cbe6eea52ab0ff263ca8fae
3
+ size 569
train.log CHANGED
@@ -1613,3 +1613,53 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1613
  [INFO|modeling_utils.py:2590] 2024-05-13 14:58:40,302 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1614
  [INFO|tokenization_utils_base.py:2488] 2024-05-13 14:58:40,303 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1615
  [INFO|tokenization_utils_base.py:2497] 2024-05-13 14:58:40,303 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1616
  0%| | 0/851 [00:00<?, ?it/s]
1617
  1%| | 10/851 [00:00<00:09, 92.39it/s]
1618
  2%|▏ | 20/851 [00:00<00:10, 78.68it/s]
1619
  3%|▎ | 28/851 [00:00<00:10, 75.69it/s]
1620
  4%|▍ | 36/851 [00:00<00:10, 74.36it/s]
1621
  5%|▌ | 44/851 [00:00<00:10, 75.32it/s]
1622
  6%|▌ | 52/851 [00:00<00:10, 75.93it/s]
1623
  7%|▋ | 60/851 [00:00<00:10, 76.39it/s]
1624
  8%|▊ | 68/851 [00:00<00:11, 70.86it/s]
1625
  9%|▉ | 76/851 [00:01<00:11, 70.00it/s]
1626
  10%|▉ | 84/851 [00:01<00:10, 70.49it/s]
1627
  11%|█ | 92/851 [00:01<00:10, 70.55it/s]
1628
  12%|█▏ | 100/851 [00:01<00:10, 70.25it/s]
1629
  13%|█▎ | 108/851 [00:01<00:10, 69.63it/s]
1630
  14%|█▎ | 115/851 [00:01<00:10, 69.25it/s]
1631
  15%|█▍ | 124/851 [00:01<00:09, 73.89it/s]
1632
  16%|█▌ | 132/851 [00:01<00:10, 66.17it/s]
1633
  16%|█▋ | 139/851 [00:01<00:10, 66.18it/s]
1634
  17%|█▋ | 147/851 [00:02<00:10, 67.87it/s]
1635
  18%|█▊ | 154/851 [00:02<00:10, 67.52it/s]
1636
  19%|█▉ | 162/851 [00:02<00:09, 69.68it/s]
1637
  20%|█▉ | 170/851 [00:02<00:09, 71.19it/s]
1638
  21%|██ | 178/851 [00:02<00:09, 73.27it/s]
1639
  22%|██▏ | 186/851 [00:02<00:09, 73.02it/s]
1640
  23%|██▎ | 194/851 [00:02<00:08, 74.19it/s]
1641
  24%|██▍ | 203/851 [00:02<00:08, 76.36it/s]
1642
  25%|██▍ | 211/851 [00:02<00:08, 71.75it/s]
1643
  26%|██▌ | 219/851 [00:03<00:09, 69.67it/s]
1644
  27%|██▋ | 227/851 [00:03<00:08, 71.38it/s]
1645
  28%|██▊ | 235/851 [00:03<00:08, 72.05it/s]
1646
  29%|██▊ | 243/851 [00:03<00:09, 67.00it/s]
1647
  29%|██▉ | 251/851 [00:03<00:08, 69.80it/s]
1648
  30%|███ | 259/851 [00:03<00:08, 72.22it/s]
1649
  31%|███▏ | 267/851 [00:03<00:08, 72.74it/s]
1650
  32%|███▏ | 275/851 [00:03<00:07, 73.62it/s]
1651
  33%|███▎ | 283/851 [00:03<00:07, 74.61it/s]
1652
  34%|███▍ | 291/851 [00:04<00:07, 72.54it/s]
1653
  35%|███▌ | 299/851 [00:04<00:07, 74.26it/s]
1654
  36%|███▌ | 307/851 [00:04<00:07, 75.74it/s]
1655
  37%|███▋ | 315/851 [00:04<00:07, 70.27it/s]
1656
  38%|███▊ | 323/851 [00:04<00:07, 72.85it/s]
1657
  39%|███▉ | 331/851 [00:04<00:07, 72.36it/s]
1658
  40%|███▉ | 339/851 [00:04<00:07, 71.89it/s]
1659
  41%|████ | 347/851 [00:04<00:06, 72.58it/s]
1660
  42%|████▏ | 355/851 [00:04<00:07, 69.70it/s]
1661
  43%|████▎ | 363/851 [00:05<00:06, 69.83it/s]
1662
  44%|████▎ | 371/851 [00:05<00:06, 69.06it/s]
1663
  44%|████▍ | 378/851 [00:05<00:06, 69.30it/s]
1664
  45%|████▌ | 385/851 [00:05<00:06, 68.06it/s]
1665
  46%|████▌ | 393/851 [00:05<00:06, 69.52it/s]
1666
  47%|████▋ | 401/851 [00:05<00:06, 69.90it/s]
1667
  48%|████▊ | 408/851 [00:05<00:06, 64.81it/s]
1668
  49%|████▉ | 416/851 [00:05<00:06, 67.34it/s]
1669
  50%|████▉ | 424/851 [00:05<00:06, 69.53it/s]
1670
  51%|█████ | 431/851 [00:06<00:06, 68.95it/s]
1671
  52%|█████▏ | 439/851 [00:06<00:05, 71.11it/s]
1672
  53%|█████▎ | 447/851 [00:06<00:05, 70.55it/s]
1673
  53%|█████▎ | 455/851 [00:06<00:05, 71.50it/s]
1674
  54%|█████▍ | 463/851 [00:06<00:05, 70.54it/s]
1675
  55%|█████▌ | 471/851 [00:06<00:06, 61.54it/s]
1676
  56%|█████▌ | 478/851 [00:06<00:05, 63.35it/s]
1677
  57%|█████▋ | 485/851 [00:06<00:05, 63.12it/s]
1678
  58%|█████▊ | 493/851 [00:06<00:05, 66.52it/s]
1679
  59%|█████▉ | 502/851 [00:07<00:04, 70.10it/s]
1680
  60%|█████▉ | 510/851 [00:07<00:04, 69.87it/s]
1681
  61%|██████ | 518/851 [00:07<00:04, 71.30it/s]
1682
  62%|██████▏ | 526/851 [00:07<00:04, 66.37it/s]
1683
  63%|██████▎ | 534/851 [00:07<00:04, 68.15it/s]
1684
  64%|██████▎ | 542/851 [00:07<00:04, 69.51it/s]
1685
  65%|██████▍ | 550/851 [00:07<00:04, 67.52it/s]
1686
  66%|██████▌ | 558/851 [00:07<00:04, 69.41it/s]
1687
  67%|██████▋ | 566/851 [00:08<00:03, 72.24it/s]
1688
  67%|██████▋ | 574/851 [00:08<00:03, 72.87it/s]
1689
  68%|██████▊ | 582/851 [00:08<00:03, 69.95it/s]
1690
  69%|██████▉ | 590/851 [00:08<00:03, 67.24it/s]
1691
  70%|███████ | 597/851 [00:08<00:03, 67.77it/s]
1692
  71%|███████ | 605/851 [00:08<00:03, 67.61it/s]
1693
  72%|███████▏ | 612/851 [00:08<00:03, 66.37it/s]
1694
  73%|███████▎ | 619/851 [00:08<00:03, 64.78it/s]
1695
  74%|███████▎ | 626/851 [00:08<00:03, 64.41it/s]
1696
  74%|███████▍ | 633/851 [00:09<00:03, 64.56it/s]
1697
  75%|███████▌ | 640/851 [00:09<00:03, 64.37it/s]
1698
  76%|███████▌ | 647/851 [00:09<00:03, 62.56it/s]
1699
  77%|███████▋ | 655/851 [00:09<00:02, 65.73it/s]
1700
  78%|███████▊ | 662/851 [00:09<00:02, 66.42it/s]
1701
  79%|███████▊ | 670/851 [00:09<00:02, 67.98it/s]
1702
  80%|███████▉ | 678/851 [00:09<00:02, 69.04it/s]
1703
  81%|████████ | 686/851 [00:09<00:02, 69.89it/s]
1704
  82%|████████▏ | 694/851 [00:09<00:02, 71.78it/s]
1705
  82%|████████▏ | 702/851 [00:10<00:02, 72.47it/s]
1706
  83%|████████▎ | 710/851 [00:10<00:01, 73.89it/s]
1707
  84%|████████▍ | 718/851 [00:10<00:01, 70.91it/s]
1708
  85%|████████▌ | 726/851 [00:10<00:01, 70.61it/s]
1709
  86%|████████▋ | 734/851 [00:10<00:01, 72.17it/s]
1710
  87%|████████▋ | 742/851 [00:10<00:01, 73.05it/s]
1711
  88%|████████▊ | 750/851 [00:10<00:01, 72.59it/s]
1712
  89%|████████▉ | 758/851 [00:10<00:01, 73.09it/s]
1713
  90%|█████████ | 766/851 [00:10<00:01, 69.09it/s]
1714
  91%|█████████ | 774/851 [00:11<00:01, 68.91it/s]
1715
  92%|█████████▏| 781/851 [00:11<00:01, 66.57it/s]
1716
  93%|█████████▎| 788/851 [00:11<00:00, 67.34it/s]
1717
  94%|█████████▎| 796/851 [00:11<00:00, 69.12it/s]
1718
  94%|█████████▍| 804/851 [00:11<00:00, 71.53it/s]
1719
  95%|█████████▌| 812/851 [00:11<00:00, 69.03it/s]
1720
  96%|█████████▋| 820/851 [00:11<00:00, 69.99it/s]
1721
  97%|█████████▋| 828/851 [00:11<00:00, 71.40it/s]
1722
  98%|█████████▊| 836/851 [00:11<00:00, 71.06it/s]
1723
  99%|█████████▉| 844/851 [00:12<00:00, 67.57it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1724
  0%| | 0/851 [00:00<?, ?it/s]
1725
  1%| | 10/851 [00:00<00:08, 94.43it/s]
1726
  2%|▏ | 20/851 [00:00<00:10, 79.64it/s]
1727
  3%|▎ | 29/851 [00:00<00:10, 75.61it/s]
1728
  4%|▍ | 37/851 [00:00<00:11, 73.82it/s]
1729
  5%|▌ | 45/851 [00:00<00:10, 73.71it/s]
1730
  6%|▌ | 53/851 [00:00<00:10, 74.31it/s]
1731
  7%|▋ | 62/851 [00:00<00:10, 75.34it/s]
1732
  8%|▊ | 70/851 [00:00<00:11, 70.02it/s]
1733
  9%|▉ | 78/851 [00:01<00:10, 71.64it/s]
1734
  10%|█ | 86/851 [00:01<00:10, 71.26it/s]
1735
  11%|█ | 94/851 [00:01<00:10, 69.42it/s]
1736
  12%|█▏ | 102/851 [00:01<00:10, 70.77it/s]
1737
  13%|█▎ | 110/851 [00:01<00:10, 69.03it/s]
1738
  14%|█▍ | 118/851 [00:01<00:10, 70.01it/s]
1739
  15%|█▍ | 126/851 [00:01<00:10, 67.17it/s]
1740
  16%|█▌ | 133/851 [00:01<00:10, 67.65it/s]
1741
  16%|█▋ | 140/851 [00:01<00:10, 67.39it/s]
1742
  17%|█▋ | 148/851 [00:02<00:10, 66.64it/s]
1743
  18%|█▊ | 156/851 [00:02<00:10, 68.76it/s]
1744
  19%|█▉ | 164/851 [00:02<00:09, 69.74it/s]
1745
  20%|██ | 172/851 [00:02<00:09, 71.66it/s]
1746
  21%|██ | 180/851 [00:02<00:09, 72.19it/s]
1747
  22%|██▏ | 188/851 [00:02<00:09, 72.67it/s]
1748
  23%|██▎ | 196/851 [00:02<00:08, 73.59it/s]
1749
  24%|██▍ | 204/851 [00:02<00:08, 75.18it/s]
1750
  25%|██▍ | 212/851 [00:02<00:09, 70.71it/s]
1751
  26%|██▌ | 220/851 [00:03<00:09, 68.53it/s]
1752
  27%|██▋ | 228/851 [00:03<00:08, 69.85it/s]
1753
  28%|██▊ | 236/851 [00:03<00:08, 69.98it/s]
1754
  29%|██▊ | 244/851 [00:03<00:09, 66.03it/s]
1755
  30%|██▉ | 252/851 [00:03<00:08, 69.13it/s]
1756
  31%|███ | 261/851 [00:03<00:08, 72.34it/s]
1757
  32%|███▏ | 269/851 [00:03<00:08, 71.00it/s]
1758
  33%|███▎ | 277/851 [00:03<00:07, 73.30it/s]
1759
  33%|███▎ | 285/851 [00:03<00:07, 74.56it/s]
1760
  34%|███▍ | 293/851 [00:04<00:07, 72.43it/s]
1761
  35%|███▌ | 301/851 [00:04<00:07, 73.31it/s]
1762
  36%|███▋ | 310/851 [00:04<00:07, 75.84it/s]
1763
  37%|███▋ | 318/851 [00:04<00:07, 71.54it/s]
1764
  38%|███▊ | 326/851 [00:04<00:07, 71.08it/s]
1765
  39%|███▉ | 334/851 [00:04<00:07, 71.61it/s]
1766
  40%|████ | 342/851 [00:04<00:07, 72.39it/s]
1767
  41%|████ | 350/851 [00:04<00:06, 71.99it/s]
1768
  42%|████▏ | 358/851 [00:05<00:07, 67.28it/s]
1769
  43%|████▎ | 366/851 [00:05<00:07, 67.95it/s]
1770
  44%|████▍ | 373/851 [00:05<00:06, 68.31it/s]
1771
  45%|████▍ | 380/851 [00:05<00:07, 66.41it/s]
1772
  46%|████▌ | 388/851 [00:05<00:06, 69.17it/s]
1773
  46%|████▋ | 395/851 [00:05<00:06, 69.34it/s]
1774
  47%|████▋ | 402/851 [00:05<00:06, 69.06it/s]
1775
  48%|████▊ | 409/851 [00:05<00:06, 65.01it/s]
1776
  49%|████▉ | 417/851 [00:05<00:06, 67.26it/s]
1777
  50%|████▉ | 425/851 [00:06<00:06, 66.34it/s]
1778
  51%|█████ | 433/851 [00:06<00:06, 68.38it/s]
1779
  52%|█████▏ | 441/851 [00:06<00:05, 70.44it/s]
1780
  53%|█████▎ | 449/851 [00:06<00:05, 70.15it/s]
1781
  54%|█████▎ | 457/851 [00:06<00:05, 71.36it/s]
1782
  55%|█████▍ | 465/851 [00:06<00:05, 68.14it/s]
1783
  55%|█████▌ | 472/851 [00:06<00:06, 61.77it/s]
1784
  56%|█████▋ | 479/851 [00:06<00:05, 63.68it/s]
1785
  57%|█████▋ | 486/851 [00:06<00:05, 62.95it/s]
1786
  58%|█████▊ | 494/851 [00:07<00:05, 66.26it/s]
1787
  59%|█████▉ | 502/851 [00:07<00:05, 69.52it/s]
1788
  60%|█████▉ | 510/851 [00:07<00:04, 69.09it/s]
1789
  61%|██████ | 518/851 [00:07<00:04, 70.28it/s]
1790
  62%|██████▏ | 526/851 [00:07<00:04, 65.43it/s]
1791
  63%|██████▎ | 534/851 [00:07<00:04, 67.57it/s]
1792
  64%|██████▎ | 542/851 [00:07<00:04, 69.07it/s]
1793
  65%|██████▍ | 549/851 [00:07<00:04, 66.40it/s]
1794
  65%|██████▌ | 557/851 [00:07<00:04, 68.83it/s]
1795
  67%|██████▋ | 566/851 [00:08<00:03, 72.25it/s]
1796
  67%|██████▋ | 574/851 [00:08<00:03, 71.57it/s]
1797
  68%|██████▊ | 582/851 [00:08<00:03, 69.26it/s]
1798
  69%|██████▉ | 589/851 [00:08<00:03, 66.06it/s]
1799
  70%|███████ | 596/851 [00:08<00:03, 67.01it/s]
1800
  71%|███████ | 604/851 [00:08<00:03, 68.21it/s]
1801
  72%|███████▏ | 611/851 [00:08<00:03, 67.43it/s]
1802
  73%|███████▎ | 618/851 [00:08<00:03, 63.33it/s]
1803
  74%|███████▎ | 626/851 [00:09<00:03, 64.04it/s]
1804
  74%|███████▍ | 633/851 [00:09<00:03, 63.85it/s]
1805
  75%|███████▌ | 640/851 [00:09<00:03, 64.00it/s]
1806
  76%|███████▌ | 647/851 [00:09<00:03, 62.36it/s]
1807
  77%|███████▋ | 655/851 [00:09<00:02, 65.58it/s]
1808
  78%|███████▊ | 662/851 [00:09<00:02, 66.21it/s]
1809
  79%|███████▊ | 669/851 [00:09<00:02, 67.09it/s]
1810
  79%|███████▉ | 676/851 [00:09<00:02, 66.93it/s]
1811
  80%|████████ | 683/851 [00:09<00:02, 67.45it/s]
1812
  81%|████████ | 691/851 [00:09<00:02, 70.83it/s]
1813
  82%|████████▏ | 699/851 [00:10<00:02, 70.91it/s]
1814
  83%|████████▎ | 708/851 [00:10<00:01, 74.14it/s]
1815
  84%|████████▍ | 716/851 [00:10<00:01, 71.95it/s]
1816
  85%|████████▌ | 724/851 [00:10<00:01, 72.30it/s]
1817
  86%|████████▌ | 732/851 [00:10<00:01, 73.78it/s]
1818
  87%|████████▋ | 740/851 [00:10<00:01, 73.29it/s]
1819
  88%|████████▊ | 748/851 [00:10<00:01, 72.54it/s]
1820
  89%|████████▉ | 756/851 [00:10<00:01, 72.39it/s]
1821
  90%|████████▉ | 764/851 [00:10<00:01, 73.41it/s]
1822
  91%|█████████ | 772/851 [00:11<00:01, 68.50it/s]
1823
  92%|█████████▏| 779/851 [00:11<00:01, 66.70it/s]
1824
  92%|█████████▏| 786/851 [00:11<00:00, 67.37it/s]
1825
  93%|█████████▎| 794/851 [00:11<00:00, 68.93it/s]
1826
  94%|█████████▍| 802/851 [00:11<00:00, 71.12it/s]
1827
  95%|█████████▌| 810/851 [00:11<00:00, 68.26it/s]
1828
  96%|█████████▌| 818/851 [00:11<00:00, 69.10it/s]
1829
  97%|█████████▋| 826/851 [00:11<00:00, 70.24it/s]
1830
  98%|█████████▊| 834/851 [00:11<00:00, 69.55it/s]
1831
  99%|█████████▉| 841/851 [00:12<00:00, 69.21it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1613
  [INFO|modeling_utils.py:2590] 2024-05-13 14:58:40,302 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1614
  [INFO|tokenization_utils_base.py:2488] 2024-05-13 14:58:40,303 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1615
  [INFO|tokenization_utils_base.py:2497] 2024-05-13 14:58:40,303 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1616
+ {'eval_loss': 1.7860842943191528, 'eval_precision': 0.09270693512304251, 'eval_recall': 0.9522058823529411, 'eval_f1': 0.16896354888689555, 'eval_accuracy': 0.7845534874460183, 'eval_runtime': 15.9319, 'eval_samples_per_second': 427.256, 'eval_steps_per_second': 53.415, 'epoch': 10.0}
1617
+ {'train_runtime': 3337.712, 'train_samples_per_second': 81.565, 'train_steps_per_second': 5.096, 'train_loss': 0.08913132946046923, 'epoch': 10.0}
1618
+ ***** train metrics *****
1619
+ epoch = 9.9971
1620
+ total_flos = 6240533GF
1621
+ train_loss = 0.0891
1622
+ train_runtime = 0:55:37.71
1623
+ train_samples = 27224
1624
+ train_samples_per_second = 81.565
1625
+ train_steps_per_second = 5.096
1626
+ 05/13/2024 14:58:43 - INFO - __main__ - *** Evaluate ***
1627
+ [INFO|trainer.py:786] 2024-05-13 14:58:43,309 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1628
+ [INFO|trainer.py:3614] 2024-05-13 14:58:43,312 >> ***** Running Evaluation *****
1629
+ [INFO|trainer.py:3616] 2024-05-13 14:58:43,312 >> Num examples = 6807
1630
+ [INFO|trainer.py:3619] 2024-05-13 14:58:43,312 >> Batch size = 8
1631
+
1632
  0%| | 0/851 [00:00<?, ?it/s]
1633
  1%| | 10/851 [00:00<00:09, 92.39it/s]
1634
  2%|▏ | 20/851 [00:00<00:10, 78.68it/s]
1635
  3%|▎ | 28/851 [00:00<00:10, 75.69it/s]
1636
  4%|▍ | 36/851 [00:00<00:10, 74.36it/s]
1637
  5%|▌ | 44/851 [00:00<00:10, 75.32it/s]
1638
  6%|▌ | 52/851 [00:00<00:10, 75.93it/s]
1639
  7%|▋ | 60/851 [00:00<00:10, 76.39it/s]
1640
  8%|▊ | 68/851 [00:00<00:11, 70.86it/s]
1641
  9%|▉ | 76/851 [00:01<00:11, 70.00it/s]
1642
  10%|▉ | 84/851 [00:01<00:10, 70.49it/s]
1643
  11%|█ | 92/851 [00:01<00:10, 70.55it/s]
1644
  12%|█▏ | 100/851 [00:01<00:10, 70.25it/s]
1645
  13%|█▎ | 108/851 [00:01<00:10, 69.63it/s]
1646
  14%|█▎ | 115/851 [00:01<00:10, 69.25it/s]
1647
  15%|█▍ | 124/851 [00:01<00:09, 73.89it/s]
1648
  16%|█▌ | 132/851 [00:01<00:10, 66.17it/s]
1649
  16%|█▋ | 139/851 [00:01<00:10, 66.18it/s]
1650
  17%|█▋ | 147/851 [00:02<00:10, 67.87it/s]
1651
  18%|█▊ | 154/851 [00:02<00:10, 67.52it/s]
1652
  19%|█▉ | 162/851 [00:02<00:09, 69.68it/s]
1653
  20%|█▉ | 170/851 [00:02<00:09, 71.19it/s]
1654
  21%|██ | 178/851 [00:02<00:09, 73.27it/s]
1655
  22%|██▏ | 186/851 [00:02<00:09, 73.02it/s]
1656
  23%|██▎ | 194/851 [00:02<00:08, 74.19it/s]
1657
  24%|██▍ | 203/851 [00:02<00:08, 76.36it/s]
1658
  25%|██▍ | 211/851 [00:02<00:08, 71.75it/s]
1659
  26%|██▌ | 219/851 [00:03<00:09, 69.67it/s]
1660
  27%|██▋ | 227/851 [00:03<00:08, 71.38it/s]
1661
  28%|██▊ | 235/851 [00:03<00:08, 72.05it/s]
1662
  29%|██▊ | 243/851 [00:03<00:09, 67.00it/s]
1663
  29%|██▉ | 251/851 [00:03<00:08, 69.80it/s]
1664
  30%|███ | 259/851 [00:03<00:08, 72.22it/s]
1665
  31%|███▏ | 267/851 [00:03<00:08, 72.74it/s]
1666
  32%|███▏ | 275/851 [00:03<00:07, 73.62it/s]
1667
  33%|███▎ | 283/851 [00:03<00:07, 74.61it/s]
1668
  34%|███▍ | 291/851 [00:04<00:07, 72.54it/s]
1669
  35%|███▌ | 299/851 [00:04<00:07, 74.26it/s]
1670
  36%|███▌ | 307/851 [00:04<00:07, 75.74it/s]
1671
  37%|███▋ | 315/851 [00:04<00:07, 70.27it/s]
1672
  38%|███▊ | 323/851 [00:04<00:07, 72.85it/s]
1673
  39%|███▉ | 331/851 [00:04<00:07, 72.36it/s]
1674
  40%|███▉ | 339/851 [00:04<00:07, 71.89it/s]
1675
  41%|████ | 347/851 [00:04<00:06, 72.58it/s]
1676
  42%|████▏ | 355/851 [00:04<00:07, 69.70it/s]
1677
  43%|████▎ | 363/851 [00:05<00:06, 69.83it/s]
1678
  44%|████▎ | 371/851 [00:05<00:06, 69.06it/s]
1679
  44%|████▍ | 378/851 [00:05<00:06, 69.30it/s]
1680
  45%|████▌ | 385/851 [00:05<00:06, 68.06it/s]
1681
  46%|████▌ | 393/851 [00:05<00:06, 69.52it/s]
1682
  47%|████▋ | 401/851 [00:05<00:06, 69.90it/s]
1683
  48%|████▊ | 408/851 [00:05<00:06, 64.81it/s]
1684
  49%|████▉ | 416/851 [00:05<00:06, 67.34it/s]
1685
  50%|████▉ | 424/851 [00:05<00:06, 69.53it/s]
1686
  51%|█████ | 431/851 [00:06<00:06, 68.95it/s]
1687
  52%|█████▏ | 439/851 [00:06<00:05, 71.11it/s]
1688
  53%|█████▎ | 447/851 [00:06<00:05, 70.55it/s]
1689
  53%|█████▎ | 455/851 [00:06<00:05, 71.50it/s]
1690
  54%|█████▍ | 463/851 [00:06<00:05, 70.54it/s]
1691
  55%|█████▌ | 471/851 [00:06<00:06, 61.54it/s]
1692
  56%|█████▌ | 478/851 [00:06<00:05, 63.35it/s]
1693
  57%|█████▋ | 485/851 [00:06<00:05, 63.12it/s]
1694
  58%|█████▊ | 493/851 [00:06<00:05, 66.52it/s]
1695
  59%|█████▉ | 502/851 [00:07<00:04, 70.10it/s]
1696
  60%|█████▉ | 510/851 [00:07<00:04, 69.87it/s]
1697
  61%|██████ | 518/851 [00:07<00:04, 71.30it/s]
1698
  62%|██████▏ | 526/851 [00:07<00:04, 66.37it/s]
1699
  63%|██████▎ | 534/851 [00:07<00:04, 68.15it/s]
1700
  64%|██████▎ | 542/851 [00:07<00:04, 69.51it/s]
1701
  65%|██████▍ | 550/851 [00:07<00:04, 67.52it/s]
1702
  66%|██████▌ | 558/851 [00:07<00:04, 69.41it/s]
1703
  67%|██████▋ | 566/851 [00:08<00:03, 72.24it/s]
1704
  67%|██████▋ | 574/851 [00:08<00:03, 72.87it/s]
1705
  68%|██████▊ | 582/851 [00:08<00:03, 69.95it/s]
1706
  69%|██████▉ | 590/851 [00:08<00:03, 67.24it/s]
1707
  70%|███████ | 597/851 [00:08<00:03, 67.77it/s]
1708
  71%|███████ | 605/851 [00:08<00:03, 67.61it/s]
1709
  72%|███████▏ | 612/851 [00:08<00:03, 66.37it/s]
1710
  73%|███████▎ | 619/851 [00:08<00:03, 64.78it/s]
1711
  74%|███████▎ | 626/851 [00:08<00:03, 64.41it/s]
1712
  74%|███████▍ | 633/851 [00:09<00:03, 64.56it/s]
1713
  75%|███████▌ | 640/851 [00:09<00:03, 64.37it/s]
1714
  76%|███████▌ | 647/851 [00:09<00:03, 62.56it/s]
1715
  77%|███████▋ | 655/851 [00:09<00:02, 65.73it/s]
1716
  78%|███████▊ | 662/851 [00:09<00:02, 66.42it/s]
1717
  79%|███████▊ | 670/851 [00:09<00:02, 67.98it/s]
1718
  80%|███████▉ | 678/851 [00:09<00:02, 69.04it/s]
1719
  81%|████████ | 686/851 [00:09<00:02, 69.89it/s]
1720
  82%|████████▏ | 694/851 [00:09<00:02, 71.78it/s]
1721
  82%|████████▏ | 702/851 [00:10<00:02, 72.47it/s]
1722
  83%|████████▎ | 710/851 [00:10<00:01, 73.89it/s]
1723
  84%|████████▍ | 718/851 [00:10<00:01, 70.91it/s]
1724
  85%|████████▌ | 726/851 [00:10<00:01, 70.61it/s]
1725
  86%|████████▋ | 734/851 [00:10<00:01, 72.17it/s]
1726
  87%|████████▋ | 742/851 [00:10<00:01, 73.05it/s]
1727
  88%|████████▊ | 750/851 [00:10<00:01, 72.59it/s]
1728
  89%|████████▉ | 758/851 [00:10<00:01, 73.09it/s]
1729
  90%|█████████ | 766/851 [00:10<00:01, 69.09it/s]
1730
  91%|█████████ | 774/851 [00:11<00:01, 68.91it/s]
1731
  92%|█████████▏| 781/851 [00:11<00:01, 66.57it/s]
1732
  93%|█████████▎| 788/851 [00:11<00:00, 67.34it/s]
1733
  94%|█████████▎| 796/851 [00:11<00:00, 69.12it/s]
1734
  94%|█████████▍| 804/851 [00:11<00:00, 71.53it/s]
1735
  95%|█████████▌| 812/851 [00:11<00:00, 69.03it/s]
1736
  96%|█████████▋| 820/851 [00:11<00:00, 69.99it/s]
1737
  97%|█████████▋| 828/851 [00:11<00:00, 71.40it/s]
1738
  98%|█████████▊| 836/851 [00:11<00:00, 71.06it/s]
1739
  99%|█████████▉| 844/851 [00:12<00:00, 67.57it/s]
1740
+ _warn_prf(average, modifier, msg_start, len(result))
1741
+
1742
+ ***** eval metrics *****
1743
+ epoch = 9.9971
1744
+ eval_accuracy = 0.81
1745
+ eval_f1 = 0.1759
1746
+ eval_loss = 0.6631
1747
+ eval_precision = 0.0969
1748
+ eval_recall = 0.9485
1749
+ eval_runtime = 0:00:15.81
1750
+ eval_samples = 6807
1751
+ eval_samples_per_second = 430.395
1752
+ eval_steps_per_second = 53.807
1753
+ 05/13/2024 14:58:59 - INFO - __main__ - *** Predict ***
1754
+ [INFO|trainer.py:786] 2024-05-13 14:58:59,135 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1755
+ [INFO|trainer.py:3614] 2024-05-13 14:58:59,137 >> ***** Running Prediction *****
1756
+ [INFO|trainer.py:3616] 2024-05-13 14:58:59,137 >> Num examples = 6807
1757
+ [INFO|trainer.py:3619] 2024-05-13 14:58:59,138 >> Batch size = 8
1758
+
1759
  0%| | 0/851 [00:00<?, ?it/s]
1760
  1%| | 10/851 [00:00<00:08, 94.43it/s]
1761
  2%|▏ | 20/851 [00:00<00:10, 79.64it/s]
1762
  3%|▎ | 29/851 [00:00<00:10, 75.61it/s]
1763
  4%|▍ | 37/851 [00:00<00:11, 73.82it/s]
1764
  5%|▌ | 45/851 [00:00<00:10, 73.71it/s]
1765
  6%|▌ | 53/851 [00:00<00:10, 74.31it/s]
1766
  7%|▋ | 62/851 [00:00<00:10, 75.34it/s]
1767
  8%|▊ | 70/851 [00:00<00:11, 70.02it/s]
1768
  9%|▉ | 78/851 [00:01<00:10, 71.64it/s]
1769
  10%|█ | 86/851 [00:01<00:10, 71.26it/s]
1770
  11%|█ | 94/851 [00:01<00:10, 69.42it/s]
1771
  12%|█▏ | 102/851 [00:01<00:10, 70.77it/s]
1772
  13%|█▎ | 110/851 [00:01<00:10, 69.03it/s]
1773
  14%|█▍ | 118/851 [00:01<00:10, 70.01it/s]
1774
  15%|█▍ | 126/851 [00:01<00:10, 67.17it/s]
1775
  16%|█▌ | 133/851 [00:01<00:10, 67.65it/s]
1776
  16%|█▋ | 140/851 [00:01<00:10, 67.39it/s]
1777
  17%|█▋ | 148/851 [00:02<00:10, 66.64it/s]
1778
  18%|█▊ | 156/851 [00:02<00:10, 68.76it/s]
1779
  19%|█▉ | 164/851 [00:02<00:09, 69.74it/s]
1780
  20%|██ | 172/851 [00:02<00:09, 71.66it/s]
1781
  21%|██ | 180/851 [00:02<00:09, 72.19it/s]
1782
  22%|██▏ | 188/851 [00:02<00:09, 72.67it/s]
1783
  23%|██▎ | 196/851 [00:02<00:08, 73.59it/s]
1784
  24%|██▍ | 204/851 [00:02<00:08, 75.18it/s]
1785
  25%|██▍ | 212/851 [00:02<00:09, 70.71it/s]
1786
  26%|██▌ | 220/851 [00:03<00:09, 68.53it/s]
1787
  27%|██▋ | 228/851 [00:03<00:08, 69.85it/s]
1788
  28%|██▊ | 236/851 [00:03<00:08, 69.98it/s]
1789
  29%|██▊ | 244/851 [00:03<00:09, 66.03it/s]
1790
  30%|██▉ | 252/851 [00:03<00:08, 69.13it/s]
1791
  31%|███ | 261/851 [00:03<00:08, 72.34it/s]
1792
  32%|███▏ | 269/851 [00:03<00:08, 71.00it/s]
1793
  33%|███▎ | 277/851 [00:03<00:07, 73.30it/s]
1794
  33%|███▎ | 285/851 [00:03<00:07, 74.56it/s]
1795
  34%|███▍ | 293/851 [00:04<00:07, 72.43it/s]
1796
  35%|███▌ | 301/851 [00:04<00:07, 73.31it/s]
1797
  36%|███▋ | 310/851 [00:04<00:07, 75.84it/s]
1798
  37%|███▋ | 318/851 [00:04<00:07, 71.54it/s]
1799
  38%|███▊ | 326/851 [00:04<00:07, 71.08it/s]
1800
  39%|███▉ | 334/851 [00:04<00:07, 71.61it/s]
1801
  40%|████ | 342/851 [00:04<00:07, 72.39it/s]
1802
  41%|████ | 350/851 [00:04<00:06, 71.99it/s]
1803
  42%|████▏ | 358/851 [00:05<00:07, 67.28it/s]
1804
  43%|████▎ | 366/851 [00:05<00:07, 67.95it/s]
1805
  44%|████▍ | 373/851 [00:05<00:06, 68.31it/s]
1806
  45%|████▍ | 380/851 [00:05<00:07, 66.41it/s]
1807
  46%|████▌ | 388/851 [00:05<00:06, 69.17it/s]
1808
  46%|████▋ | 395/851 [00:05<00:06, 69.34it/s]
1809
  47%|████▋ | 402/851 [00:05<00:06, 69.06it/s]
1810
  48%|████▊ | 409/851 [00:05<00:06, 65.01it/s]
1811
  49%|████▉ | 417/851 [00:05<00:06, 67.26it/s]
1812
  50%|████▉ | 425/851 [00:06<00:06, 66.34it/s]
1813
  51%|█████ | 433/851 [00:06<00:06, 68.38it/s]
1814
  52%|█████▏ | 441/851 [00:06<00:05, 70.44it/s]
1815
  53%|█████▎ | 449/851 [00:06<00:05, 70.15it/s]
1816
  54%|█████▎ | 457/851 [00:06<00:05, 71.36it/s]
1817
  55%|█████▍ | 465/851 [00:06<00:05, 68.14it/s]
1818
  55%|█████▌ | 472/851 [00:06<00:06, 61.77it/s]
1819
  56%|█████▋ | 479/851 [00:06<00:05, 63.68it/s]
1820
  57%|█████▋ | 486/851 [00:06<00:05, 62.95it/s]
1821
  58%|█████▊ | 494/851 [00:07<00:05, 66.26it/s]
1822
  59%|█████▉ | 502/851 [00:07<00:05, 69.52it/s]
1823
  60%|█████▉ | 510/851 [00:07<00:04, 69.09it/s]
1824
  61%|██████ | 518/851 [00:07<00:04, 70.28it/s]
1825
  62%|██████▏ | 526/851 [00:07<00:04, 65.43it/s]
1826
  63%|██████▎ | 534/851 [00:07<00:04, 67.57it/s]
1827
  64%|██████▎ | 542/851 [00:07<00:04, 69.07it/s]
1828
  65%|██████▍ | 549/851 [00:07<00:04, 66.40it/s]
1829
  65%|██████▌ | 557/851 [00:07<00:04, 68.83it/s]
1830
  67%|██████▋ | 566/851 [00:08<00:03, 72.25it/s]
1831
  67%|██████▋ | 574/851 [00:08<00:03, 71.57it/s]
1832
  68%|██████▊ | 582/851 [00:08<00:03, 69.26it/s]
1833
  69%|██████▉ | 589/851 [00:08<00:03, 66.06it/s]
1834
  70%|███████ | 596/851 [00:08<00:03, 67.01it/s]
1835
  71%|███████ | 604/851 [00:08<00:03, 68.21it/s]
1836
  72%|███████▏ | 611/851 [00:08<00:03, 67.43it/s]
1837
  73%|███████▎ | 618/851 [00:08<00:03, 63.33it/s]
1838
  74%|███████▎ | 626/851 [00:09<00:03, 64.04it/s]
1839
  74%|███████▍ | 633/851 [00:09<00:03, 63.85it/s]
1840
  75%|███████▌ | 640/851 [00:09<00:03, 64.00it/s]
1841
  76%|███████▌ | 647/851 [00:09<00:03, 62.36it/s]
1842
  77%|███████▋ | 655/851 [00:09<00:02, 65.58it/s]
1843
  78%|███████▊ | 662/851 [00:09<00:02, 66.21it/s]
1844
  79%|███████▊ | 669/851 [00:09<00:02, 67.09it/s]
1845
  79%|███████▉ | 676/851 [00:09<00:02, 66.93it/s]
1846
  80%|████████ | 683/851 [00:09<00:02, 67.45it/s]
1847
  81%|████████ | 691/851 [00:09<00:02, 70.83it/s]
1848
  82%|████████▏ | 699/851 [00:10<00:02, 70.91it/s]
1849
  83%|████████▎ | 708/851 [00:10<00:01, 74.14it/s]
1850
  84%|████████▍ | 716/851 [00:10<00:01, 71.95it/s]
1851
  85%|████████▌ | 724/851 [00:10<00:01, 72.30it/s]
1852
  86%|████████▌ | 732/851 [00:10<00:01, 73.78it/s]
1853
  87%|████████▋ | 740/851 [00:10<00:01, 73.29it/s]
1854
  88%|████████▊ | 748/851 [00:10<00:01, 72.54it/s]
1855
  89%|████████▉ | 756/851 [00:10<00:01, 72.39it/s]
1856
  90%|████████▉ | 764/851 [00:10<00:01, 73.41it/s]
1857
  91%|█████████ | 772/851 [00:11<00:01, 68.50it/s]
1858
  92%|█████████▏| 779/851 [00:11<00:01, 66.70it/s]
1859
  92%|█████████▏| 786/851 [00:11<00:00, 67.37it/s]
1860
  93%|█████████▎| 794/851 [00:11<00:00, 68.93it/s]
1861
  94%|█████████▍| 802/851 [00:11<00:00, 71.12it/s]
1862
  95%|█████████▌| 810/851 [00:11<00:00, 68.26it/s]
1863
  96%|█████████▌| 818/851 [00:11<00:00, 69.10it/s]
1864
  97%|█████████▋| 826/851 [00:11<00:00, 70.24it/s]
1865
  98%|█████████▊| 834/851 [00:11<00:00, 69.55it/s]
1866
  99%|█████████▉| 841/851 [00:12<00:00, 69.21it/s]
1867
+ [INFO|trainer.py:3305] 2024-05-13 14:59:15,354 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1868
+ [INFO|configuration_utils.py:471] 2024-05-13 14:59:15,355 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1869
+ [INFO|modeling_utils.py:2590] 2024-05-13 14:59:16,582 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1870
+ [INFO|tokenization_utils_base.py:2488] 2024-05-13 14:59:16,583 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1871
+ [INFO|tokenization_utils_base.py:2497] 2024-05-13 14:59:16,584 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1872
+ ***** predict metrics *****
1873
+ predict_accuracy = 0.81
1874
+ predict_f1 = 0.1759
1875
+ predict_loss = 0.6631
1876
+ predict_precision = 0.0969
1877
+ predict_recall = 0.9485
1878
+ predict_runtime = 0:00:15.89
1879
+ predict_samples_per_second = 428.156
1880
+ predict_steps_per_second = 53.527
1881
+
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.997061416397296,
3
+ "total_flos": 6700722040732752.0,
4
+ "train_loss": 0.08913132946046923,
5
+ "train_runtime": 3337.712,
6
+ "train_samples": 27224,
7
+ "train_samples_per_second": 81.565,
8
+ "train_steps_per_second": 5.096
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17586912065439672,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3403",
4
+ "epoch": 9.997061416397296,
5
+ "eval_steps": 500,
6
+ "global_step": 17010,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29385836027034967,
13
+ "grad_norm": 1.9668159484863281,
14
+ "learning_rate": 4.853027630805409e-05,
15
+ "loss": 0.4174,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.5877167205406993,
20
+ "grad_norm": 3.246731758117676,
21
+ "learning_rate": 4.7060552616108174e-05,
22
+ "loss": 0.2765,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.8815750808110491,
27
+ "grad_norm": 2.936720609664917,
28
+ "learning_rate": 4.559082892416226e-05,
29
+ "loss": 0.2596,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9997061416397296,
34
+ "eval_accuracy": 0.7671626010120082,
35
+ "eval_f1": 0.14622946114658822,
36
+ "eval_loss": 0.7913026213645935,
37
+ "eval_precision": 0.0793057825511713,
38
+ "eval_recall": 0.9365808823529411,
39
+ "eval_runtime": 16.052,
40
+ "eval_samples_per_second": 424.06,
41
+ "eval_steps_per_second": 53.015,
42
+ "step": 1701
43
+ },
44
+ {
45
+ "epoch": 1.1754334410813987,
46
+ "grad_norm": 2.302980661392212,
47
+ "learning_rate": 4.4121105232216346e-05,
48
+ "loss": 0.2135,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 1.4692918013517484,
53
+ "grad_norm": 3.7150967121124268,
54
+ "learning_rate": 4.265138154027043e-05,
55
+ "loss": 0.1839,
56
+ "step": 2500
57
+ },
58
+ {
59
+ "epoch": 1.7631501616220981,
60
+ "grad_norm": 1.2019191980361938,
61
+ "learning_rate": 4.118165784832452e-05,
62
+ "loss": 0.1853,
63
+ "step": 3000
64
+ },
65
+ {
66
+ "epoch": 2.0,
67
+ "eval_accuracy": 0.8099635429897495,
68
+ "eval_f1": 0.17586912065439672,
69
+ "eval_loss": 0.6631014347076416,
70
+ "eval_precision": 0.09691960931630353,
71
+ "eval_recall": 0.9485294117647058,
72
+ "eval_runtime": 15.875,
73
+ "eval_samples_per_second": 428.788,
74
+ "eval_steps_per_second": 53.606,
75
+ "step": 3403
76
+ },
77
+ {
78
+ "epoch": 2.0570085218924477,
79
+ "grad_norm": 2.090308904647827,
80
+ "learning_rate": 3.971193415637861e-05,
81
+ "loss": 0.1684,
82
+ "step": 3500
83
+ },
84
+ {
85
+ "epoch": 2.3508668821627974,
86
+ "grad_norm": 0.9995286464691162,
87
+ "learning_rate": 3.824221046443269e-05,
88
+ "loss": 0.1222,
89
+ "step": 4000
90
+ },
91
+ {
92
+ "epoch": 2.644725242433147,
93
+ "grad_norm": 1.3526027202606201,
94
+ "learning_rate": 3.677248677248677e-05,
95
+ "loss": 0.1277,
96
+ "step": 4500
97
+ },
98
+ {
99
+ "epoch": 2.938583602703497,
100
+ "grad_norm": 2.2816500663757324,
101
+ "learning_rate": 3.530276308054086e-05,
102
+ "loss": 0.1254,
103
+ "step": 5000
104
+ },
105
+ {
106
+ "epoch": 2.9997061416397295,
107
+ "eval_accuracy": 0.7754975935626944,
108
+ "eval_f1": 0.16528259292106748,
109
+ "eval_loss": 1.072888731956482,
110
+ "eval_precision": 0.0905877154220062,
111
+ "eval_recall": 0.9420955882352942,
112
+ "eval_runtime": 15.8836,
113
+ "eval_samples_per_second": 428.556,
114
+ "eval_steps_per_second": 53.577,
115
+ "step": 5104
116
+ },
117
+ {
118
+ "epoch": 3.2324419629738466,
119
+ "grad_norm": 0.6440290808677673,
120
+ "learning_rate": 3.3833039388594945e-05,
121
+ "loss": 0.0929,
122
+ "step": 5500
123
+ },
124
+ {
125
+ "epoch": 3.5263003232441963,
126
+ "grad_norm": 3.7906153202056885,
127
+ "learning_rate": 3.2363315696649034e-05,
128
+ "loss": 0.0896,
129
+ "step": 6000
130
+ },
131
+ {
132
+ "epoch": 3.820158683514546,
133
+ "grad_norm": 1.0953032970428467,
134
+ "learning_rate": 3.0893592004703116e-05,
135
+ "loss": 0.0823,
136
+ "step": 6500
137
+ },
138
+ {
139
+ "epoch": 4.0,
140
+ "eval_accuracy": 0.7719411469883488,
141
+ "eval_f1": 0.16243814311523055,
142
+ "eval_loss": 1.2567578554153442,
143
+ "eval_precision": 0.08880872627329726,
144
+ "eval_recall": 0.9503676470588235,
145
+ "eval_runtime": 15.8976,
146
+ "eval_samples_per_second": 428.179,
147
+ "eval_steps_per_second": 53.53,
148
+ "step": 6806
149
+ },
150
+ {
151
+ "epoch": 4.114017043784895,
152
+ "grad_norm": 1.7823286056518555,
153
+ "learning_rate": 2.9423868312757202e-05,
154
+ "loss": 0.0761,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 4.407875404055245,
159
+ "grad_norm": 1.165720820426941,
160
+ "learning_rate": 2.795414462081129e-05,
161
+ "loss": 0.0603,
162
+ "step": 7500
163
+ },
164
+ {
165
+ "epoch": 4.701733764325595,
166
+ "grad_norm": 4.1130452156066895,
167
+ "learning_rate": 2.648442092886537e-05,
168
+ "loss": 0.0589,
169
+ "step": 8000
170
+ },
171
+ {
172
+ "epoch": 4.9955921245959445,
173
+ "grad_norm": 0.3957385718822479,
174
+ "learning_rate": 2.501469723691946e-05,
175
+ "loss": 0.0597,
176
+ "step": 8500
177
+ },
178
+ {
179
+ "epoch": 4.99970614163973,
180
+ "eval_accuracy": 0.7836540772119656,
181
+ "eval_f1": 0.17101181993461315,
182
+ "eval_loss": 1.1907650232315063,
183
+ "eval_precision": 0.0940872613227562,
184
+ "eval_recall": 0.9375,
185
+ "eval_runtime": 16.019,
186
+ "eval_samples_per_second": 424.932,
187
+ "eval_steps_per_second": 53.124,
188
+ "step": 8507
189
+ },
190
+ {
191
+ "epoch": 5.289450484866294,
192
+ "grad_norm": 1.0087623596191406,
193
+ "learning_rate": 2.3544973544973546e-05,
194
+ "loss": 0.0423,
195
+ "step": 9000
196
+ },
197
+ {
198
+ "epoch": 5.583308845136644,
199
+ "grad_norm": 2.162200450897217,
200
+ "learning_rate": 2.2075249853027632e-05,
201
+ "loss": 0.043,
202
+ "step": 9500
203
+ },
204
+ {
205
+ "epoch": 5.877167205406994,
206
+ "grad_norm": 1.1820895671844482,
207
+ "learning_rate": 2.0605526161081718e-05,
208
+ "loss": 0.0446,
209
+ "step": 10000
210
+ },
211
+ {
212
+ "epoch": 6.0,
213
+ "eval_accuracy": 0.7811686840461102,
214
+ "eval_f1": 0.1718036055495555,
215
+ "eval_loss": 1.384422779083252,
216
+ "eval_precision": 0.09443784820531555,
217
+ "eval_recall": 0.9503676470588235,
218
+ "eval_runtime": 15.9006,
219
+ "eval_samples_per_second": 428.098,
220
+ "eval_steps_per_second": 53.52,
221
+ "step": 10209
222
+ },
223
+ {
224
+ "epoch": 6.171025565677343,
225
+ "grad_norm": 2.190476894378662,
226
+ "learning_rate": 1.91358024691358e-05,
227
+ "loss": 0.0333,
228
+ "step": 10500
229
+ },
230
+ {
231
+ "epoch": 6.464883925947693,
232
+ "grad_norm": 1.5180469751358032,
233
+ "learning_rate": 1.766607877718989e-05,
234
+ "loss": 0.0333,
235
+ "step": 11000
236
+ },
237
+ {
238
+ "epoch": 6.758742286218043,
239
+ "grad_norm": 1.4279770851135254,
240
+ "learning_rate": 1.6196355085243976e-05,
241
+ "loss": 0.0325,
242
+ "step": 11500
243
+ },
244
+ {
245
+ "epoch": 6.99970614163973,
246
+ "eval_accuracy": 0.7866406684471785,
247
+ "eval_f1": 0.1704836709384043,
248
+ "eval_loss": 1.5515447854995728,
249
+ "eval_precision": 0.09366766603070772,
250
+ "eval_recall": 0.9476102941176471,
251
+ "eval_runtime": 15.8267,
252
+ "eval_samples_per_second": 430.095,
253
+ "eval_steps_per_second": 53.77,
254
+ "step": 11910
255
+ },
256
+ {
257
+ "epoch": 7.052600646488393,
258
+ "grad_norm": 0.6558970212936401,
259
+ "learning_rate": 1.472663139329806e-05,
260
+ "loss": 0.0289,
261
+ "step": 12000
262
+ },
263
+ {
264
+ "epoch": 7.346459006758742,
265
+ "grad_norm": 0.26189878582954407,
266
+ "learning_rate": 1.3256907701352148e-05,
267
+ "loss": 0.0224,
268
+ "step": 12500
269
+ },
270
+ {
271
+ "epoch": 7.640317367029092,
272
+ "grad_norm": 1.370686650276184,
273
+ "learning_rate": 1.1787184009406232e-05,
274
+ "loss": 0.0231,
275
+ "step": 13000
276
+ },
277
+ {
278
+ "epoch": 7.934175727299442,
279
+ "grad_norm": 0.36619672179222107,
280
+ "learning_rate": 1.0317460317460318e-05,
281
+ "loss": 0.022,
282
+ "step": 13500
283
+ },
284
+ {
285
+ "epoch": 8.0,
286
+ "eval_accuracy": 0.7842582611859856,
287
+ "eval_f1": 0.1688722903304376,
288
+ "eval_loss": 1.6299601793289185,
289
+ "eval_precision": 0.09261733012734882,
290
+ "eval_recall": 0.9558823529411765,
291
+ "eval_runtime": 15.9057,
292
+ "eval_samples_per_second": 427.961,
293
+ "eval_steps_per_second": 53.503,
294
+ "step": 13612
295
+ },
296
+ {
297
+ "epoch": 8.22803408756979,
298
+ "grad_norm": 1.0557399988174438,
299
+ "learning_rate": 8.847736625514404e-06,
300
+ "loss": 0.0164,
301
+ "step": 14000
302
+ },
303
+ {
304
+ "epoch": 8.521892447840141,
305
+ "grad_norm": 4.32920503616333,
306
+ "learning_rate": 7.37801293356849e-06,
307
+ "loss": 0.0164,
308
+ "step": 14500
309
+ },
310
+ {
311
+ "epoch": 8.81575080811049,
312
+ "grad_norm": 0.1674884408712387,
313
+ "learning_rate": 5.908289241622575e-06,
314
+ "loss": 0.017,
315
+ "step": 15000
316
+ },
317
+ {
318
+ "epoch": 8.999706141639729,
319
+ "eval_accuracy": 0.7844848301762433,
320
+ "eval_f1": 0.16934759532946844,
321
+ "eval_loss": 1.7459304332733154,
322
+ "eval_precision": 0.09292947396720136,
323
+ "eval_recall": 0.953125,
324
+ "eval_runtime": 16.0915,
325
+ "eval_samples_per_second": 423.018,
326
+ "eval_steps_per_second": 52.885,
327
+ "step": 15313
328
+ },
329
+ {
330
+ "epoch": 9.10960916838084,
331
+ "grad_norm": 3.3734261989593506,
332
+ "learning_rate": 4.438565549676661e-06,
333
+ "loss": 0.0171,
334
+ "step": 15500
335
+ },
336
+ {
337
+ "epoch": 9.40346752865119,
338
+ "grad_norm": 0.19828377664089203,
339
+ "learning_rate": 2.9688418577307467e-06,
340
+ "loss": 0.013,
341
+ "step": 16000
342
+ },
343
+ {
344
+ "epoch": 9.69732588892154,
345
+ "grad_norm": 1.491190791130066,
346
+ "learning_rate": 1.4991181657848325e-06,
347
+ "loss": 0.0133,
348
+ "step": 16500
349
+ },
350
+ {
351
+ "epoch": 9.991184249191889,
352
+ "grad_norm": 2.5641109943389893,
353
+ "learning_rate": 2.9394473838918286e-08,
354
+ "loss": 0.0135,
355
+ "step": 17000
356
+ },
357
+ {
358
+ "epoch": 9.997061416397296,
359
+ "eval_accuracy": 0.7845534874460183,
360
+ "eval_f1": 0.16896354888689555,
361
+ "eval_loss": 1.7860842943191528,
362
+ "eval_precision": 0.09270693512304251,
363
+ "eval_recall": 0.9522058823529411,
364
+ "eval_runtime": 15.9319,
365
+ "eval_samples_per_second": 427.256,
366
+ "eval_steps_per_second": 53.415,
367
+ "step": 17010
368
+ },
369
+ {
370
+ "epoch": 9.997061416397296,
371
+ "step": 17010,
372
+ "total_flos": 6700722040732752.0,
373
+ "train_loss": 0.08913132946046923,
374
+ "train_runtime": 3337.712,
375
+ "train_samples_per_second": 81.565,
376
+ "train_steps_per_second": 5.096
377
+ }
378
+ ],
379
+ "logging_steps": 500,
380
+ "max_steps": 17010,
381
+ "num_input_tokens_seen": 0,
382
+ "num_train_epochs": 10,
383
+ "save_steps": 500,
384
+ "total_flos": 6700722040732752.0,
385
+ "train_batch_size": 4,
386
+ "trial_name": null,
387
+ "trial_params": null
388
+ }