MahmoudIbrahim commited on
Commit
cf1e3a4
·
verified ·
1 Parent(s): fa5a7da

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +51 -51
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012416190712689347,
5
  "eval_steps": 500,
6
  "global_step": 150,
7
  "is_hyper_param_search": false,
@@ -9,108 +9,108 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0008277460475126231,
13
- "grad_norm": 0.6720314025878906,
14
- "learning_rate": 0.00019448275862068965,
15
- "loss": 2.806,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.0016554920950252463,
20
- "grad_norm": 0.7282748818397522,
21
- "learning_rate": 0.00018068965517241382,
22
- "loss": 2.4015,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.0024832381425378696,
27
- "grad_norm": 0.6224257349967957,
28
- "learning_rate": 0.00016689655172413793,
29
- "loss": 2.4062,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.0033109841900504926,
34
- "grad_norm": 0.6465514898300171,
35
- "learning_rate": 0.00015310344827586207,
36
- "loss": 2.3612,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.004138730237563116,
41
- "grad_norm": 0.5610107183456421,
42
  "learning_rate": 0.0001393103448275862,
43
- "loss": 2.1203,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.004966476285075739,
48
- "grad_norm": 0.5969945192337036,
49
  "learning_rate": 0.00012551724137931035,
50
- "loss": 2.2384,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.005794222332588362,
55
- "grad_norm": 0.6113339066505432,
56
  "learning_rate": 0.00011172413793103449,
57
- "loss": 2.3128,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.006621968380100985,
62
- "grad_norm": 0.8051493167877197,
63
  "learning_rate": 9.793103448275862e-05,
64
- "loss": 2.2082,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.007449714427613608,
69
- "grad_norm": 0.6741610169410706,
70
  "learning_rate": 8.413793103448277e-05,
71
- "loss": 2.3156,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.008277460475126232,
76
- "grad_norm": 0.5629040598869324,
77
  "learning_rate": 7.03448275862069e-05,
78
- "loss": 2.1989,
79
  "step": 100
80
  },
81
  {
82
- "epoch": 0.009105206522638855,
83
- "grad_norm": 0.7672610282897949,
84
  "learning_rate": 5.6551724137931037e-05,
85
- "loss": 2.1749,
86
  "step": 110
87
  },
88
  {
89
- "epoch": 0.009932952570151478,
90
- "grad_norm": 0.5322269201278687,
91
  "learning_rate": 4.275862068965518e-05,
92
- "loss": 2.171,
93
  "step": 120
94
  },
95
  {
96
- "epoch": 0.010760698617664101,
97
- "grad_norm": 0.7353241443634033,
98
  "learning_rate": 2.8965517241379313e-05,
99
- "loss": 2.1584,
100
  "step": 130
101
  },
102
  {
103
- "epoch": 0.011588444665176724,
104
- "grad_norm": 0.6774106025695801,
105
  "learning_rate": 1.5172413793103448e-05,
106
- "loss": 2.2079,
107
  "step": 140
108
  },
109
  {
110
- "epoch": 0.012416190712689347,
111
- "grad_norm": 0.5964234471321106,
112
  "learning_rate": 1.3793103448275862e-06,
113
- "loss": 2.1165,
114
  "step": 150
115
  }
116
  ],
@@ -131,7 +131,7 @@
131
  "attributes": {}
132
  }
133
  },
134
- "total_flos": 5.120470461579264e+16,
135
  "train_batch_size": 2,
136
  "trial_name": null,
137
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.010260970687827069,
5
  "eval_steps": 500,
6
  "global_step": 150,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0006840647125218045,
13
+ "grad_norm": 0.6777583360671997,
14
+ "learning_rate": 0.0001931034482758621,
15
+ "loss": 2.5631,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.001368129425043609,
20
+ "grad_norm": 0.529003918170929,
21
+ "learning_rate": 0.0001793103448275862,
22
+ "loss": 2.2202,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.0020521941375654137,
27
+ "grad_norm": 0.6383516192436218,
28
+ "learning_rate": 0.00016551724137931035,
29
+ "loss": 2.2541,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.002736258850087218,
34
+ "grad_norm": 0.547764778137207,
35
+ "learning_rate": 0.00015172413793103449,
36
+ "loss": 2.1831,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.003420323562609023,
41
+ "grad_norm": NaN,
42
  "learning_rate": 0.0001393103448275862,
43
+ "loss": 2.2025,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.0041043882751308275,
48
+ "grad_norm": 0.6107162237167358,
49
  "learning_rate": 0.00012551724137931035,
50
+ "loss": 2.1387,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.004788452987652632,
55
+ "grad_norm": 0.5852159261703491,
56
  "learning_rate": 0.00011172413793103449,
57
+ "loss": 2.1362,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 0.005472517700174436,
62
+ "grad_norm": 0.587351381778717,
63
  "learning_rate": 9.793103448275862e-05,
64
+ "loss": 2.1783,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 0.006156582412696241,
69
+ "grad_norm": 0.4379996657371521,
70
  "learning_rate": 8.413793103448277e-05,
71
+ "loss": 2.0619,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 0.006840647125218046,
76
+ "grad_norm": 0.6348068714141846,
77
  "learning_rate": 7.03448275862069e-05,
78
+ "loss": 2.1559,
79
  "step": 100
80
  },
81
  {
82
+ "epoch": 0.0075247118377398505,
83
+ "grad_norm": 0.5908586382865906,
84
  "learning_rate": 5.6551724137931037e-05,
85
+ "loss": 2.0957,
86
  "step": 110
87
  },
88
  {
89
+ "epoch": 0.008208776550261655,
90
+ "grad_norm": 0.6663537621498108,
91
  "learning_rate": 4.275862068965518e-05,
92
+ "loss": 2.2033,
93
  "step": 120
94
  },
95
  {
96
+ "epoch": 0.008892841262783458,
97
+ "grad_norm": 0.5192745923995972,
98
  "learning_rate": 2.8965517241379313e-05,
99
+ "loss": 2.0376,
100
  "step": 130
101
  },
102
  {
103
+ "epoch": 0.009576905975305264,
104
+ "grad_norm": 0.6954190731048584,
105
  "learning_rate": 1.5172413793103448e-05,
106
+ "loss": 2.108,
107
  "step": 140
108
  },
109
  {
110
+ "epoch": 0.010260970687827069,
111
+ "grad_norm": 0.6196115016937256,
112
  "learning_rate": 1.3793103448275862e-06,
113
+ "loss": 1.9693,
114
  "step": 150
115
  }
116
  ],
 
131
  "attributes": {}
132
  }
133
  },
134
+ "total_flos": 5.164445885681664e+16,
135
  "train_batch_size": 2,
136
  "trial_name": null,
137
  "trial_params": null