navjordj commited on
Commit
5688450
1 Parent(s): 9c684a6

End of training

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 10.0,
3
- "predict_gen_len": 45.68034889687019,
4
- "predict_loss": 2.10367488861084,
5
- "predict_rouge1": 31.8781,
6
- "predict_rouge2": 14.0329,
7
- "predict_rougeL": 27.5321,
8
- "predict_rougeLsum": 29.6372,
9
- "predict_runtime": 175.8488,
10
  "predict_samples": 1949,
11
- "predict_samples_per_second": 11.083,
12
  "predict_steps_per_second": 0.347,
13
- "train_loss": 2.6604936368203576,
14
- "train_runtime": 4208.1526,
15
  "train_samples": 11044,
16
- "train_samples_per_second": 26.244,
17
- "train_steps_per_second": 0.411
18
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "predict_gen_len": 45.74653668547973,
4
+ "predict_loss": 2.085327386856079,
5
+ "predict_rouge1": 32.5094,
6
+ "predict_rouge2": 14.5115,
7
+ "predict_rougeL": 28.0616,
8
+ "predict_rougeLsum": 30.2293,
9
+ "predict_runtime": 175.8133,
10
  "predict_samples": 1949,
11
+ "predict_samples_per_second": 11.086,
12
  "predict_steps_per_second": 0.347,
13
+ "train_loss": 1.2214314234739094,
14
+ "train_runtime": 4297.3848,
15
  "train_samples": 11044,
16
+ "train_samples_per_second": 51.399,
17
+ "train_steps_per_second": 0.805
18
  }
generated_predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
 
1
  {
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
predict_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "predict_gen_len": 45.68034889687019,
3
- "predict_loss": 2.10367488861084,
4
- "predict_rouge1": 31.8781,
5
- "predict_rouge2": 14.0329,
6
- "predict_rougeL": 27.5321,
7
- "predict_rougeLsum": 29.6372,
8
- "predict_runtime": 175.8488,
9
  "predict_samples": 1949,
10
- "predict_samples_per_second": 11.083,
11
  "predict_steps_per_second": 0.347
12
  }
 
1
  {
2
+ "predict_gen_len": 45.74653668547973,
3
+ "predict_loss": 2.085327386856079,
4
+ "predict_rouge1": 32.5094,
5
+ "predict_rouge2": 14.5115,
6
+ "predict_rougeL": 28.0616,
7
+ "predict_rougeLsum": 30.2293,
8
+ "predict_runtime": 175.8133,
9
  "predict_samples": 1949,
10
+ "predict_samples_per_second": 11.086,
11
  "predict_steps_per_second": 0.347
12
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 2.6604936368203576,
4
- "train_runtime": 4208.1526,
5
  "train_samples": 11044,
6
- "train_samples_per_second": 26.244,
7
- "train_steps_per_second": 0.411
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 1.2214314234739094,
4
+ "train_runtime": 4297.3848,
5
  "train_samples": 11044,
6
+ "train_samples_per_second": 51.399,
7
+ "train_steps_per_second": 0.805
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 1730,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -26,18 +26,120 @@
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 10.0,
30
- "step": 1730,
31
- "total_flos": 2.648315673797714e+17,
32
- "train_loss": 2.6604936368203576,
33
- "train_runtime": 4208.1526,
34
- "train_samples_per_second": 26.244,
35
- "train_steps_per_second": 0.411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
- "max_steps": 1730,
39
- "num_train_epochs": 10,
40
- "total_flos": 2.648315673797714e+17,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 3460,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 10.4,
30
+ "learning_rate": 2.3988439306358382e-05,
31
+ "loss": 2.4779,
32
+ "step": 1800
33
+ },
34
+ {
35
+ "epoch": 10.98,
36
+ "learning_rate": 2.254335260115607e-05,
37
+ "loss": 2.4677,
38
+ "step": 1900
39
+ },
40
+ {
41
+ "epoch": 11.56,
42
+ "learning_rate": 2.1098265895953757e-05,
43
+ "loss": 2.4479,
44
+ "step": 2000
45
+ },
46
+ {
47
+ "epoch": 12.14,
48
+ "learning_rate": 1.9653179190751446e-05,
49
+ "loss": 2.4503,
50
+ "step": 2100
51
+ },
52
+ {
53
+ "epoch": 12.72,
54
+ "learning_rate": 1.8208092485549132e-05,
55
+ "loss": 2.4222,
56
+ "step": 2200
57
+ },
58
+ {
59
+ "epoch": 13.29,
60
+ "learning_rate": 1.676300578034682e-05,
61
+ "loss": 2.4106,
62
+ "step": 2300
63
+ },
64
+ {
65
+ "epoch": 13.87,
66
+ "learning_rate": 1.531791907514451e-05,
67
+ "loss": 2.4054,
68
+ "step": 2400
69
+ },
70
+ {
71
+ "epoch": 14.45,
72
+ "learning_rate": 1.3872832369942197e-05,
73
+ "loss": 2.4101,
74
+ "step": 2500
75
+ },
76
+ {
77
+ "epoch": 15.03,
78
+ "learning_rate": 1.2427745664739884e-05,
79
+ "loss": 2.3919,
80
+ "step": 2600
81
+ },
82
+ {
83
+ "epoch": 15.61,
84
+ "learning_rate": 1.0982658959537573e-05,
85
+ "loss": 2.3841,
86
+ "step": 2700
87
+ },
88
+ {
89
+ "epoch": 16.18,
90
+ "learning_rate": 9.53757225433526e-06,
91
+ "loss": 2.377,
92
+ "step": 2800
93
+ },
94
+ {
95
+ "epoch": 16.76,
96
+ "learning_rate": 8.092485549132949e-06,
97
+ "loss": 2.3716,
98
+ "step": 2900
99
+ },
100
+ {
101
+ "epoch": 17.34,
102
+ "learning_rate": 6.647398843930635e-06,
103
+ "loss": 2.3867,
104
+ "step": 3000
105
+ },
106
+ {
107
+ "epoch": 17.92,
108
+ "learning_rate": 5.202312138728324e-06,
109
+ "loss": 2.3632,
110
+ "step": 3100
111
+ },
112
+ {
113
+ "epoch": 18.5,
114
+ "learning_rate": 3.757225433526012e-06,
115
+ "loss": 2.3578,
116
+ "step": 3200
117
+ },
118
+ {
119
+ "epoch": 19.08,
120
+ "learning_rate": 2.3121387283236993e-06,
121
+ "loss": 2.3616,
122
+ "step": 3300
123
+ },
124
+ {
125
+ "epoch": 19.65,
126
+ "learning_rate": 8.670520231213873e-07,
127
+ "loss": 2.3635,
128
+ "step": 3400
129
+ },
130
+ {
131
+ "epoch": 20.0,
132
+ "step": 3460,
133
+ "total_flos": 5.296559685853962e+17,
134
+ "train_loss": 1.2214314234739094,
135
+ "train_runtime": 4297.3848,
136
+ "train_samples_per_second": 51.399,
137
+ "train_steps_per_second": 0.805
138
  }
139
  ],
140
+ "max_steps": 3460,
141
+ "num_train_epochs": 20,
142
+ "total_flos": 5.296559685853962e+17,
143
  "trial_name": null,
144
  "trial_params": null
145
  }