AmberYifan commited on
Commit
bc92ee5
·
verified ·
1 Parent(s): 1f6ccba

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7046444348750576,
5
- "train_runtime": 776.543,
6
  "train_samples": 1995,
7
- "train_samples_per_second": 2.569,
8
- "train_steps_per_second": 0.08
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.704919635288177,
5
+ "train_runtime": 933.9268,
6
  "train_samples": 1995,
7
+ "train_samples_per_second": 2.136,
8
+ "train_steps_per_second": 0.066
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6acd4f2edc1f7730dafd2f0b9084c16a3a0adcc2faa46a35335dc6252f085a4
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f49dbd2f2d490ba057516e1f9306a55e7c0637326b6e6be86d5f53b14b3637ef
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ee8e2dbcd414ebb8630e4c1ac5e7209033b2fb3b34df06198dbc64ed9183779
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c45de9a29dc8ec25c25e96284e78b3e97d2a2f19fe77e883d231a533d5e778
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2baca308706598f8809344bb46fd2112c3cb080db9f195f72ff72f75a008a344
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb80f49591f876daed95ba28de7d10b23c200dc9f039b6e0b5322191f23bcd8
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee80d907122ea50cc20eeb4dbc840beb40dde7400013b1739ebd60fcb7036307
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74edcb8c5e932ce550f5f8b73c7604a2aff2e5083b38fdfdd724c151c8b6e109
3
  size 1089994880
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7046444348750576,
5
- "train_runtime": 776.543,
6
  "train_samples": 1995,
7
- "train_samples_per_second": 2.569,
8
- "train_steps_per_second": 0.08
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.704919635288177,
5
+ "train_runtime": 933.9268,
6
  "train_samples": 1995,
7
+ "train_samples_per_second": 2.136,
8
+ "train_steps_per_second": 0.066
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
- "grad_norm": 50.82963308997749,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -1.7165532112121582,
16
  "logits/real": -1.5090866088867188,
@@ -25,102 +25,102 @@
25
  },
26
  {
27
  "epoch": 0.16,
28
- "grad_norm": 41.3948314326989,
29
  "learning_rate": 4.727272727272727e-07,
30
- "logits/generated": -1.8417009115219116,
31
- "logits/real": -1.5857137441635132,
32
- "logps/generated": -281.29766845703125,
33
- "logps/real": -260.3644104003906,
34
- "loss": 0.9043,
35
- "rewards/accuracies": 0.5416666865348816,
36
- "rewards/generated": 0.23805660009384155,
37
- "rewards/margins": 0.10852599143981934,
38
- "rewards/real": 0.3465825915336609,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
- "grad_norm": 35.46442576215001,
44
  "learning_rate": 3.818181818181818e-07,
45
- "logits/generated": -1.7797447443008423,
46
- "logits/real": -1.4822289943695068,
47
- "logps/generated": -265.62164306640625,
48
- "logps/real": -228.3395233154297,
49
- "loss": 0.7337,
50
- "rewards/accuracies": 0.824999988079071,
51
- "rewards/generated": 1.8866024017333984,
52
- "rewards/margins": 0.8881384134292603,
53
- "rewards/real": 2.7747409343719482,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
- "grad_norm": 34.49996220301337,
59
  "learning_rate": 2.909090909090909e-07,
60
- "logits/generated": -1.678046464920044,
61
- "logits/real": -1.3650352954864502,
62
- "logps/generated": -255.2372589111328,
63
- "logps/real": -221.446044921875,
64
- "loss": 0.6873,
65
  "rewards/accuracies": 0.7250000238418579,
66
- "rewards/generated": 3.282224178314209,
67
- "rewards/margins": 0.8962146639823914,
68
- "rewards/real": 4.178439140319824,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
- "grad_norm": 31.3654281108449,
74
  "learning_rate": 2e-07,
75
- "logits/generated": -1.5494762659072876,
76
- "logits/real": -1.2621484994888306,
77
- "logps/generated": -233.6564483642578,
78
- "logps/real": -204.22552490234375,
79
- "loss": 0.6527,
80
- "rewards/accuracies": 0.7875000238418579,
81
- "rewards/generated": 4.193800926208496,
82
- "rewards/margins": 1.0007737874984741,
83
- "rewards/real": 5.19457483291626,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
- "grad_norm": 32.94242474259375,
89
  "learning_rate": 1.0909090909090908e-07,
90
- "logits/generated": -1.5461094379425049,
91
- "logits/real": -1.345146894454956,
92
- "logps/generated": -235.3571319580078,
93
- "logps/real": -204.53231811523438,
94
- "loss": 0.6424,
95
  "rewards/accuracies": 0.75,
96
- "rewards/generated": 4.165228843688965,
97
- "rewards/margins": 1.0194575786590576,
98
- "rewards/real": 5.184686183929443,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
- "grad_norm": 35.41079003724441,
104
  "learning_rate": 1.818181818181818e-08,
105
- "logits/generated": -1.635496735572815,
106
- "logits/real": -1.3326714038848877,
107
- "logps/generated": -236.8266143798828,
108
- "logps/real": -199.64089965820312,
109
- "loss": 0.6352,
110
  "rewards/accuracies": 0.800000011920929,
111
- "rewards/generated": 4.119868278503418,
112
- "rewards/margins": 1.221605658531189,
113
- "rewards/real": 5.341473579406738,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
- "train_loss": 0.7046444348750576,
121
- "train_runtime": 776.543,
122
- "train_samples_per_second": 2.569,
123
- "train_steps_per_second": 0.08
124
  }
125
  ],
126
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
+ "grad_norm": 50.835563259148245,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -1.7165532112121582,
16
  "logits/real": -1.5090866088867188,
 
25
  },
26
  {
27
  "epoch": 0.16,
28
+ "grad_norm": 41.66364421784157,
29
  "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -1.8415862321853638,
31
+ "logits/real": -1.5861574411392212,
32
+ "logps/generated": -281.33892822265625,
33
+ "logps/real": -260.4600830078125,
34
+ "loss": 0.9056,
35
+ "rewards/accuracies": 0.5138888955116272,
36
+ "rewards/generated": 0.23393188416957855,
37
+ "rewards/margins": 0.10307849943637848,
38
+ "rewards/real": 0.33701038360595703,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 35.24058113049003,
44
  "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -1.7791579961776733,
46
+ "logits/real": -1.480543613433838,
47
+ "logps/generated": -265.5545959472656,
48
+ "logps/real": -228.2611083984375,
49
+ "loss": 0.7334,
50
+ "rewards/accuracies": 0.8374999761581421,
51
+ "rewards/generated": 1.893307089805603,
52
+ "rewards/margins": 0.8892760276794434,
53
+ "rewards/real": 2.782582998275757,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
+ "grad_norm": 34.94460500011804,
59
  "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -1.688865303993225,
61
+ "logits/real": -1.3790943622589111,
62
+ "logps/generated": -254.9552764892578,
63
+ "logps/real": -221.18008422851562,
64
+ "loss": 0.6877,
65
  "rewards/accuracies": 0.7250000238418579,
66
+ "rewards/generated": 3.3104233741760254,
67
+ "rewards/margins": 0.8946127891540527,
68
+ "rewards/real": 4.205036163330078,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
+ "grad_norm": 31.70597635761484,
74
  "learning_rate": 2e-07,
75
+ "logits/generated": -1.5597318410873413,
76
+ "logits/real": -1.2736680507659912,
77
+ "logps/generated": -233.81716918945312,
78
+ "logps/real": -204.30459594726562,
79
+ "loss": 0.6532,
80
+ "rewards/accuracies": 0.7749999761581421,
81
+ "rewards/generated": 4.177728176116943,
82
+ "rewards/margins": 1.0089408159255981,
83
+ "rewards/real": 5.18666934967041,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
+ "grad_norm": 33.862934848228804,
89
  "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -1.547353982925415,
91
+ "logits/real": -1.3472106456756592,
92
+ "logps/generated": -235.5671844482422,
93
+ "logps/real": -204.73304748535156,
94
+ "loss": 0.6417,
95
  "rewards/accuracies": 0.75,
96
+ "rewards/generated": 4.144225597381592,
97
+ "rewards/margins": 1.0203888416290283,
98
+ "rewards/real": 5.164614677429199,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
+ "grad_norm": 35.459665070843094,
104
  "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -1.6352506875991821,
106
+ "logits/real": -1.3348934650421143,
107
+ "logps/generated": -237.15017700195312,
108
+ "logps/real": -200.07449340820312,
109
+ "loss": 0.6375,
110
  "rewards/accuracies": 0.800000011920929,
111
+ "rewards/generated": 4.087512969970703,
112
+ "rewards/margins": 1.2106025218963623,
113
+ "rewards/real": 5.2981157302856445,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.704919635288177,
121
+ "train_runtime": 933.9268,
122
+ "train_samples_per_second": 2.136,
123
+ "train_steps_per_second": 0.066
124
  }
125
  ],
126
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63a6939c71cf543e1a6e6c96c8332c9bfcb5a06ec0c8ed9721adf0d5a899c491
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381d74d8c9ecba27e99d67d9e76341be694008be74d7d18d61299f7e09f15680
3
  size 6392