AmberYifan commited on
Commit
06301e9
·
verified ·
1 Parent(s): 56ac09e

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7470545699963203,
5
- "train_runtime": 930.2413,
6
  "train_samples": 2484,
7
- "train_samples_per_second": 2.67,
8
- "train_steps_per_second": 0.084
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7437324760816036,
5
+ "train_runtime": 1103.4698,
6
  "train_samples": 2484,
7
+ "train_samples_per_second": 2.251,
8
+ "train_steps_per_second": 0.071
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684bdb27c40770cf3cddf81911175143ce9a1b30623967ed1f37a205e01501cb
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92113357029c867f634c8a73dd82a58a5de916fc2ad25a88d2f242e94fab8678
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a83d750d00de4c43911d14230c7cabcdef1d11399abe8950f5fe6eec28bfd8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ab47564d3b43114c1c7d1733184beb0817feaea0ab9c7eadc2e28c2a57b94d
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8111c6fab11befea669f8bfbb233fdc1a15e0ece5ff7a61267b5b8a957ff5330
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82e6c9c65d0bd2d0fd8aa827bd906ab44e7047f59ff3efed7e515b46ce049d99
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7afdab785183d4921848c4d75fadb9aa7f29ebb31a374667b269420fd23bfd6
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f461197be3ca75b615cc2a1b55bee0eb4ee17bff23dc1bf01a3a24a0903c4e65
3
  size 1089994880
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7470545699963203,
5
- "train_runtime": 930.2413,
6
  "train_samples": 2484,
7
- "train_samples_per_second": 2.67,
8
- "train_steps_per_second": 0.084
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7437324760816036,
5
+ "train_runtime": 1103.4698,
6
  "train_samples": 2484,
7
+ "train_samples_per_second": 2.251,
8
+ "train_steps_per_second": 0.071
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01282051282051282,
13
- "grad_norm": 101.89968076534468,
14
  "learning_rate": 6.25e-08,
15
  "logits/generated": -1.0679720640182495,
16
  "logits/real": -0.826486349105835,
@@ -25,117 +25,117 @@
25
  },
26
  {
27
  "epoch": 0.1282051282051282,
28
- "grad_norm": 143.51099887453523,
29
  "learning_rate": 4.857142857142857e-07,
30
- "logits/generated": -0.8684251308441162,
31
- "logits/real": -0.6457947492599487,
32
- "logps/generated": -273.63232421875,
33
- "logps/real": -274.0804748535156,
34
- "loss": 0.9133,
35
- "rewards/accuracies": 0.625,
36
- "rewards/generated": 0.16648532450199127,
37
- "rewards/margins": 0.10824514925479889,
38
- "rewards/real": 0.27473047375679016,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.2564102564102564,
43
- "grad_norm": 61.070639184089565,
44
  "learning_rate": 4.142857142857143e-07,
45
- "logits/generated": -1.0700271129608154,
46
- "logits/real": -0.7057520747184753,
47
- "logps/generated": -261.4720153808594,
48
- "logps/real": -257.3920593261719,
49
- "loss": 0.7717,
50
- "rewards/accuracies": 0.75,
51
- "rewards/generated": 0.8906627893447876,
52
- "rewards/margins": 0.7612454295158386,
53
- "rewards/real": 1.6519081592559814,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.38461538461538464,
58
- "grad_norm": 64.32897890365741,
59
  "learning_rate": 3.4285714285714286e-07,
60
- "logits/generated": -0.9067907333374023,
61
- "logits/real": -0.659667432308197,
62
- "logps/generated": -261.84893798828125,
63
- "logps/real": -250.3210906982422,
64
- "loss": 0.7144,
65
  "rewards/accuracies": 0.75,
66
- "rewards/generated": 1.7948137521743774,
67
- "rewards/margins": 0.8462657928466797,
68
- "rewards/real": 2.6410794258117676,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.5128205128205128,
73
- "grad_norm": 164.54242317315587,
74
  "learning_rate": 2.714285714285714e-07,
75
- "logits/generated": -0.9878751039505005,
76
- "logits/real": -0.6697134375572205,
77
- "logps/generated": -254.94430541992188,
78
- "logps/real": -251.97024536132812,
79
- "loss": 0.7095,
80
- "rewards/accuracies": 0.762499988079071,
81
- "rewards/generated": 1.6531692743301392,
82
- "rewards/margins": 1.1178901195526123,
83
- "rewards/real": 2.771059513092041,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.6410256410256411,
88
- "grad_norm": 65.89825293961819,
89
  "learning_rate": 2e-07,
90
- "logits/generated": -0.9043434262275696,
91
- "logits/real": -0.767203152179718,
92
- "logps/generated": -254.6318817138672,
93
- "logps/real": -240.71481323242188,
94
- "loss": 0.6938,
95
- "rewards/accuracies": 0.7875000238418579,
96
- "rewards/generated": 2.046715497970581,
97
- "rewards/margins": 0.7744835615158081,
98
- "rewards/real": 2.8211989402770996,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.7692307692307693,
103
- "grad_norm": 124.30731613732516,
104
  "learning_rate": 1.2857142857142855e-07,
105
- "logits/generated": -1.0582646131515503,
106
- "logits/real": -0.6999293565750122,
107
- "logps/generated": -245.7989044189453,
108
- "logps/real": -248.5264129638672,
109
- "loss": 0.7132,
110
  "rewards/accuracies": 0.8500000238418579,
111
- "rewards/generated": 1.8123178482055664,
112
- "rewards/margins": 1.1895701885223389,
113
- "rewards/real": 3.0018882751464844,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.8974358974358975,
118
- "grad_norm": 65.42190072046155,
119
  "learning_rate": 5.714285714285714e-08,
120
- "logits/generated": -1.1064507961273193,
121
- "logits/real": -0.7838689684867859,
122
- "logps/generated": -245.507568359375,
123
- "logps/real": -237.945068359375,
124
- "loss": 0.7122,
125
- "rewards/accuracies": 0.75,
126
- "rewards/generated": 1.7316808700561523,
127
- "rewards/margins": 1.1938471794128418,
128
- "rewards/real": 2.925528049468994,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.0,
133
  "step": 78,
134
  "total_flos": 0.0,
135
- "train_loss": 0.7470545699963203,
136
- "train_runtime": 930.2413,
137
- "train_samples_per_second": 2.67,
138
- "train_steps_per_second": 0.084
139
  }
140
  ],
141
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01282051282051282,
13
+ "grad_norm": 101.98915917164486,
14
  "learning_rate": 6.25e-08,
15
  "logits/generated": -1.0679720640182495,
16
  "logits/real": -0.826486349105835,
 
25
  },
26
  {
27
  "epoch": 0.1282051282051282,
28
+ "grad_norm": 80.52500100713164,
29
  "learning_rate": 4.857142857142857e-07,
30
+ "logits/generated": -0.8668110966682434,
31
+ "logits/real": -0.644612193107605,
32
+ "logps/generated": -273.2930908203125,
33
+ "logps/real": -273.6876220703125,
34
+ "loss": 0.907,
35
+ "rewards/accuracies": 0.5277777910232544,
36
+ "rewards/generated": 0.20040562748908997,
37
+ "rewards/margins": 0.11361115425825119,
38
+ "rewards/real": 0.31401681900024414,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.2564102564102564,
43
+ "grad_norm": 61.621972969126524,
44
  "learning_rate": 4.142857142857143e-07,
45
+ "logits/generated": -1.0656638145446777,
46
+ "logits/real": -0.7032974362373352,
47
+ "logps/generated": -260.37921142578125,
48
+ "logps/real": -255.3711395263672,
49
+ "loss": 0.7653,
50
+ "rewards/accuracies": 0.737500011920929,
51
+ "rewards/generated": 0.9999414682388306,
52
+ "rewards/margins": 0.8540604710578918,
53
+ "rewards/real": 1.8540016412734985,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.38461538461538464,
58
+ "grad_norm": 59.21144986605617,
59
  "learning_rate": 3.4285714285714286e-07,
60
+ "logits/generated": -0.9283930063247681,
61
+ "logits/real": -0.6714473962783813,
62
+ "logps/generated": -261.3749084472656,
63
+ "logps/real": -249.65054321289062,
64
+ "loss": 0.7105,
65
  "rewards/accuracies": 0.75,
66
+ "rewards/generated": 1.842218041419983,
67
+ "rewards/margins": 0.8659162521362305,
68
+ "rewards/real": 2.708134412765503,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.5128205128205128,
73
+ "grad_norm": 62.53453913007679,
74
  "learning_rate": 2.714285714285714e-07,
75
+ "logits/generated": -0.9731477499008179,
76
+ "logits/real": -0.6548992395401001,
77
+ "logps/generated": -253.58157348632812,
78
+ "logps/real": -250.48837280273438,
79
+ "loss": 0.7065,
80
+ "rewards/accuracies": 0.737500011920929,
81
+ "rewards/generated": 1.789442777633667,
82
+ "rewards/margins": 1.1298000812530518,
83
+ "rewards/real": 2.919243097305298,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.6410256410256411,
88
+ "grad_norm": 63.255829698141724,
89
  "learning_rate": 2e-07,
90
+ "logits/generated": -0.8815720677375793,
91
+ "logits/real": -0.7497758269309998,
92
+ "logps/generated": -253.22402954101562,
93
+ "logps/real": -238.9423370361328,
94
+ "loss": 0.6918,
95
+ "rewards/accuracies": 0.800000011920929,
96
+ "rewards/generated": 2.1875,
97
+ "rewards/margins": 0.8109487295150757,
98
+ "rewards/real": 2.998448610305786,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.7692307692307693,
103
+ "grad_norm": 77.61305253400987,
104
  "learning_rate": 1.2857142857142855e-07,
105
+ "logits/generated": -1.0364863872528076,
106
+ "logits/real": -0.6802612543106079,
107
+ "logps/generated": -244.5989227294922,
108
+ "logps/real": -247.1646270751953,
109
+ "loss": 0.7152,
110
  "rewards/accuracies": 0.8500000238418579,
111
+ "rewards/generated": 1.9323114156723022,
112
+ "rewards/margins": 1.2057563066482544,
113
+ "rewards/real": 3.1380679607391357,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.8974358974358975,
118
+ "grad_norm": 53.32977807319664,
119
  "learning_rate": 5.714285714285714e-08,
120
+ "logits/generated": -1.086891531944275,
121
+ "logits/real": -0.7703801989555359,
122
+ "logps/generated": -244.48086547851562,
123
+ "logps/real": -236.3184814453125,
124
+ "loss": 0.7159,
125
+ "rewards/accuracies": 0.7749999761581421,
126
+ "rewards/generated": 1.834351897239685,
127
+ "rewards/margins": 1.2538334131240845,
128
+ "rewards/real": 3.0881857872009277,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.0,
133
  "step": 78,
134
  "total_flos": 0.0,
135
+ "train_loss": 0.7437324760816036,
136
+ "train_runtime": 1103.4698,
137
+ "train_samples_per_second": 2.251,
138
+ "train_steps_per_second": 0.071
139
  }
140
  ],
141
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbda660047aab9616bdbc58c392057e52f11f679c812a40158afebcfae515e2e
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ea6eb09e073776a07bcf698fec99f405955d2f050aac5a20b0120d2d119aaf
3
  size 6392