|
--- |
|
datasets: |
|
- pszemraj/scientific_lay_summarisation-plos-norm |
|
metrics: |
|
- bleu |
|
- rouge |
|
pipeline_tag: summarization |
|
--- |
|
# Hyperparameters |
|
learning_rate=2e-5 |
|
per_device_train_batch_size=14 |
|
per_device_eval_batch_size=14 |
|
weight_decay=0.01 |
|
save_total_limit=3 |
|
num_train_epochs=3 |
|
predict_with_generate=True |
|
fp16=True |
|
|
|
# Training Output |
|
global_step=4248, |
|
training_loss=2.930363613782405, |
|
metrics={'train_runtime': 11857.8062, |
|
'train_samples_per_second': 5.014, |
|
'train_steps_per_second': 0.358, |
|
'total_flos': 1.3114345819786445e+17, |
|
'train_loss': 2.930363613782405, |
|
'epoch': 3.0} |
|
|
|
# Training Results |
|
|
|
Epoch| Training Loss| Validation Loss| Rouge1| Rouge2| Rougel| Rougelsum| Bleu| Gen Len| |
|
|:----- |:------------ |:--------------- |:-------- | :------- |:-------- |:--------- |:-------- |:--------- | |
|
1| 3.095400| 2.864138| 0.425500| 0.139000| 0.246300| 0.246300| 0.541400| 141.540900| |
|
2| 2.876500| 2.811244| 0.425600| 0.139100| 0.246500| 0.246400| 0.541600| 141.619000| |
|
3| 2.748300| 2.797923| 0.425800| 0.138700| 0.246400| 0.246300| 0.541800| 141.597000| |