yihang7 commited on
Commit
82e938b
1 Parent(s): 63e498e

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.9587
19
 
20
  ## Model description
21
 
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.9586
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9586901068687439,
4
- "eval_runtime": 338.7099,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 68.229,
7
- "eval_steps_per_second": 0.534,
8
- "train_loss": 1.0367616415023804,
9
- "train_runtime": 11588.1271,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 17.938,
12
  "train_steps_per_second": 0.035
13
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.9586499929428101,
4
+ "eval_runtime": 333.321,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 69.333,
7
+ "eval_steps_per_second": 0.543,
8
+ "train_loss": 1.0367797762155533,
9
+ "train_runtime": 11462.4547,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 18.134,
12
  "train_steps_per_second": 0.035
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9586901068687439,
4
- "eval_runtime": 338.7099,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 68.229,
7
- "eval_steps_per_second": 0.534
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.9586499929428101,
4
+ "eval_runtime": 333.321,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 69.333,
7
+ "eval_steps_per_second": 0.543
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29f64ccddda6c0fcd585f1e3182871190794b27f7694a67f641bafb359f3a16a
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d133ee02314c0c032b252de99f23524a5b1e553b206a82dd637a401291e97b9a
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d6bc73445642011681757914f7bf0c2c6a0e5de00e70cae2ee87d8edc1b89b
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32944870c023c11e648111e265a622c622e5d7fc68d7e413a56c90fe00f6cbb4
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3bac1a48a392f6a99fd3b9299797ab34813b60a6ef02afd7c5ea9ad8f99d539
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d8268cebc9504100d8e9c416cb87910a645a4212ad05f790e3bf7ccc14950a
3
  size 4540516344
runs/Jan23_19-21-30_amaterasu/events.out.tfevents.1706037734.amaterasu.3311.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6c9ddece4b8ba39591bc343de35f36636aadd5041c64d67c697e16bac8c31f
3
+ size 13411
runs/Jan23_19-21-30_amaterasu/events.out.tfevents.1706049529.amaterasu.3311.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660a2700d5790599234bd131f6d88a772664268a392f92ac8a3aaca88720fd2f
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "train_loss": 1.0367616415023804,
4
- "train_runtime": 11588.1271,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 17.938,
7
  "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "train_loss": 1.0367797762155533,
4
+ "train_runtime": 11462.4547,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 18.134,
7
  "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -17,37 +17,37 @@
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.999251652147735e-05,
20
- "loss": 2.616,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
- "loss": 1.5607,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
- "loss": 1.2901,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
- "loss": 1.2126,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 1.981347247496222e-05,
44
- "loss": 1.1381,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.973176894846855e-05,
50
- "loss": 1.1002,
51
  "step": 30
52
  },
53
  {
@@ -59,13 +59,13 @@
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
- "loss": 1.0567,
63
  "step": 40
64
  },
65
  {
66
  "epoch": 0.11,
67
  "learning_rate": 1.939986331449053e-05,
68
- "loss": 1.0453,
69
  "step": 45
70
  },
71
  {
@@ -77,7 +77,7 @@
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
- "loss": 1.0293,
81
  "step": 55
82
  },
83
  {
@@ -89,7 +89,7 @@
89
  {
90
  "epoch": 0.16,
91
  "learning_rate": 1.8761575273514005e-05,
92
- "loss": 1.0082,
93
  "step": 65
94
  },
95
  {
@@ -101,13 +101,13 @@
101
  {
102
  "epoch": 0.18,
103
  "learning_rate": 1.8362743705288127e-05,
104
- "loss": 1.002,
105
  "step": 75
106
  },
107
  {
108
  "epoch": 0.2,
109
  "learning_rate": 1.814439916631857e-05,
110
- "loss": 1.0086,
111
  "step": 80
112
  },
113
  {
@@ -119,7 +119,7 @@
119
  {
120
  "epoch": 0.22,
121
  "learning_rate": 1.7671486066220965e-05,
122
- "loss": 0.9918,
123
  "step": 90
124
  },
125
  {
@@ -137,7 +137,7 @@
137
  {
138
  "epoch": 0.26,
139
  "learning_rate": 1.6876994588534234e-05,
140
- "loss": 1.0055,
141
  "step": 105
142
  },
143
  {
@@ -149,13 +149,13 @@
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
- "loss": 0.978,
153
  "step": 115
154
  },
155
  {
156
  "epoch": 0.3,
157
  "learning_rate": 1.598996060429634e-05,
158
- "loss": 0.9822,
159
  "step": 120
160
  },
161
  {
@@ -173,13 +173,13 @@
173
  {
174
  "epoch": 0.33,
175
  "learning_rate": 1.5022320773808612e-05,
176
- "loss": 0.9865,
177
  "step": 135
178
  },
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
- "loss": 0.9718,
183
  "step": 140
184
  },
185
  {
@@ -191,7 +191,7 @@
191
  {
192
  "epoch": 0.37,
193
  "learning_rate": 1.3987096456067236e-05,
194
- "loss": 0.9725,
195
  "step": 150
196
  },
197
  {
@@ -221,49 +221,49 @@
221
  {
222
  "epoch": 0.43,
223
  "learning_rate": 1.2149704402110243e-05,
224
- "loss": 0.9589,
225
  "step": 175
226
  },
227
  {
228
  "epoch": 0.44,
229
  "learning_rate": 1.1770339692844484e-05,
230
- "loss": 0.9654,
231
  "step": 180
232
  },
233
  {
234
  "epoch": 0.46,
235
  "learning_rate": 1.1388325323764889e-05,
236
- "loss": 0.9746,
237
  "step": 185
238
  },
239
  {
240
  "epoch": 0.47,
241
  "learning_rate": 1.1004233054136726e-05,
242
- "loss": 0.9681,
243
  "step": 190
244
  },
245
  {
246
  "epoch": 0.48,
247
  "learning_rate": 1.0618637753210086e-05,
248
- "loss": 0.9692,
249
  "step": 195
250
  },
251
  {
252
  "epoch": 0.49,
253
  "learning_rate": 1.0232116539815558e-05,
254
- "loss": 0.9589,
255
  "step": 200
256
  },
257
  {
258
  "epoch": 0.5,
259
  "learning_rate": 9.845247918592937e-06,
260
- "loss": 0.9628,
261
  "step": 205
262
  },
263
  {
264
  "epoch": 0.52,
265
  "learning_rate": 9.458610914145826e-06,
266
- "loss": 0.9613,
267
  "step": 210
268
  },
269
  {
@@ -275,25 +275,25 @@
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
- "loss": 0.961,
279
  "step": 220
280
  },
281
  {
282
  "epoch": 0.55,
283
  "learning_rate": 8.305869452782446e-06,
284
- "loss": 0.9638,
285
  "step": 225
286
  },
287
  {
288
  "epoch": 0.57,
289
  "learning_rate": 7.92592924888925e-06,
290
- "loss": 0.9621,
291
  "step": 230
292
  },
293
  {
294
  "epoch": 0.58,
295
  "learning_rate": 7.549093297780133e-06,
296
- "loss": 0.959,
297
  "step": 235
298
  },
299
  {
@@ -311,7 +311,7 @@
311
  {
312
  "epoch": 0.62,
313
  "learning_rate": 6.442822761362015e-06,
314
- "loss": 0.9541,
315
  "step": 250
316
  },
317
  {
@@ -323,13 +323,13 @@
323
  {
324
  "epoch": 0.64,
325
  "learning_rate": 5.731007993667155e-06,
326
- "loss": 0.9633,
327
  "step": 260
328
  },
329
  {
330
  "epoch": 0.65,
331
  "learning_rate": 5.38442053335571e-06,
332
- "loss": 0.9527,
333
  "step": 265
334
  },
335
  {
@@ -340,19 +340,19 @@
340
  },
341
  {
342
  "epoch": 0.67,
343
- "eval_loss": 0.9585356712341309,
344
- "eval_runtime": 338.2538,
345
- "eval_samples_per_second": 68.321,
346
- "eval_steps_per_second": 0.535,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
- "train_loss": 1.0367616415023804,
354
- "train_runtime": 11588.1271,
355
- "train_samples_per_second": 17.938,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
 
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.999251652147735e-05,
20
+ "loss": 2.6184,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
+ "loss": 1.5618,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
+ "loss": 1.2898,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
+ "loss": 1.2119,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 1.981347247496222e-05,
44
+ "loss": 1.138,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.973176894846855e-05,
50
+ "loss": 1.1003,
51
  "step": 30
52
  },
53
  {
 
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
+ "loss": 1.0568,
63
  "step": 40
64
  },
65
  {
66
  "epoch": 0.11,
67
  "learning_rate": 1.939986331449053e-05,
68
+ "loss": 1.0454,
69
  "step": 45
70
  },
71
  {
 
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
+ "loss": 1.0294,
81
  "step": 55
82
  },
83
  {
 
89
  {
90
  "epoch": 0.16,
91
  "learning_rate": 1.8761575273514005e-05,
92
+ "loss": 1.0083,
93
  "step": 65
94
  },
95
  {
 
101
  {
102
  "epoch": 0.18,
103
  "learning_rate": 1.8362743705288127e-05,
104
+ "loss": 1.0021,
105
  "step": 75
106
  },
107
  {
108
  "epoch": 0.2,
109
  "learning_rate": 1.814439916631857e-05,
110
+ "loss": 1.0087,
111
  "step": 80
112
  },
113
  {
 
119
  {
120
  "epoch": 0.22,
121
  "learning_rate": 1.7671486066220965e-05,
122
+ "loss": 0.9919,
123
  "step": 90
124
  },
125
  {
 
137
  {
138
  "epoch": 0.26,
139
  "learning_rate": 1.6876994588534234e-05,
140
+ "loss": 1.0056,
141
  "step": 105
142
  },
143
  {
 
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
+ "loss": 0.9781,
153
  "step": 115
154
  },
155
  {
156
  "epoch": 0.3,
157
  "learning_rate": 1.598996060429634e-05,
158
+ "loss": 0.9823,
159
  "step": 120
160
  },
161
  {
 
173
  {
174
  "epoch": 0.33,
175
  "learning_rate": 1.5022320773808612e-05,
176
+ "loss": 0.9866,
177
  "step": 135
178
  },
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
+ "loss": 0.9719,
183
  "step": 140
184
  },
185
  {
 
191
  {
192
  "epoch": 0.37,
193
  "learning_rate": 1.3987096456067236e-05,
194
+ "loss": 0.9726,
195
  "step": 150
196
  },
197
  {
 
221
  {
222
  "epoch": 0.43,
223
  "learning_rate": 1.2149704402110243e-05,
224
+ "loss": 0.9588,
225
  "step": 175
226
  },
227
  {
228
  "epoch": 0.44,
229
  "learning_rate": 1.1770339692844484e-05,
230
+ "loss": 0.9652,
231
  "step": 180
232
  },
233
  {
234
  "epoch": 0.46,
235
  "learning_rate": 1.1388325323764889e-05,
236
+ "loss": 0.9742,
237
  "step": 185
238
  },
239
  {
240
  "epoch": 0.47,
241
  "learning_rate": 1.1004233054136726e-05,
242
+ "loss": 0.9679,
243
  "step": 190
244
  },
245
  {
246
  "epoch": 0.48,
247
  "learning_rate": 1.0618637753210086e-05,
248
+ "loss": 0.969,
249
  "step": 195
250
  },
251
  {
252
  "epoch": 0.49,
253
  "learning_rate": 1.0232116539815558e-05,
254
+ "loss": 0.9587,
255
  "step": 200
256
  },
257
  {
258
  "epoch": 0.5,
259
  "learning_rate": 9.845247918592937e-06,
260
+ "loss": 0.9627,
261
  "step": 205
262
  },
263
  {
264
  "epoch": 0.52,
265
  "learning_rate": 9.458610914145826e-06,
266
+ "loss": 0.9612,
267
  "step": 210
268
  },
269
  {
 
275
  {
276
  "epoch": 0.54,
277
  "learning_rate": 8.688345254588579e-06,
278
+ "loss": 0.9609,
279
  "step": 220
280
  },
281
  {
282
  "epoch": 0.55,
283
  "learning_rate": 8.305869452782446e-06,
284
+ "loss": 0.9637,
285
  "step": 225
286
  },
287
  {
288
  "epoch": 0.57,
289
  "learning_rate": 7.92592924888925e-06,
290
+ "loss": 0.962,
291
  "step": 230
292
  },
293
  {
294
  "epoch": 0.58,
295
  "learning_rate": 7.549093297780133e-06,
296
+ "loss": 0.9589,
297
  "step": 235
298
  },
299
  {
 
311
  {
312
  "epoch": 0.62,
313
  "learning_rate": 6.442822761362015e-06,
314
+ "loss": 0.954,
315
  "step": 250
316
  },
317
  {
 
323
  {
324
  "epoch": 0.64,
325
  "learning_rate": 5.731007993667155e-06,
326
+ "loss": 0.9632,
327
  "step": 260
328
  },
329
  {
330
  "epoch": 0.65,
331
  "learning_rate": 5.38442053335571e-06,
332
+ "loss": 0.9526,
333
  "step": 265
334
  },
335
  {
 
340
  },
341
  {
342
  "epoch": 0.67,
343
+ "eval_loss": 0.9584953188896179,
344
+ "eval_runtime": 333.8493,
345
+ "eval_samples_per_second": 69.223,
346
+ "eval_steps_per_second": 0.542,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
+ "train_loss": 1.0367797762155533,
354
+ "train_runtime": 11462.4547,
355
+ "train_samples_per_second": 18.134,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:865b2c04678854723a48a676747a698c10682157b8e6f3fe4dbf271aadd875c0
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2055e8bf9addc42b7592d7e584bd3ce9cbd7b433a56701bd779bd50ab33700e
3
  size 5624